Merge branch 'main' into java_21

2023-12-11 14:18:04 +00:00 · 2023-12-11 14:18:04 +00:00 · 40c03b0e6c
parent 83f504aa1d a9b5ef4749
commit 40c03b0e6c
300 changed files with 8039 additions and 4021 deletions
--- a/build.gradle
+++ b/build.gradle
@ -117,6 +117,9 @@ apply from: file('buildSrc/scriptDepVersions.gradle')

 apply from: file('gradle/generation/local-settings.gradle')

+// Make sure the build environment is consistent.
+apply from: file('gradle/validation/check-environment.gradle')
+
 // IDE support, settings and specials.
 apply from: file('gradle/ide/intellij-idea.gradle')
 apply from: file('gradle/ide/eclipse.gradle')
--- a/buildSrc/build.gradle
+++ b/buildSrc/build.gradle
@ -38,3 +38,9 @@ dependencies {
  implementation "commons-codec:commons-codec:${scriptDepVersions['commons-codec']}"
 }

+if (!rootProject.hasJavaFlightRecorder) {
+  logger.warn('Module jdk.jfr is not available; skipping compilation of Java Flight Recorder support.')
+  tasks.named('compileJava').configure {
+    exclude('**/ProfileResults.java')
+  }
+}
--- a/buildSrc/scriptDepVersions.gradle
+++ b/buildSrc/scriptDepVersions.gradle
@ -24,7 +24,7 @@ ext {
      "apache-rat": "0.14",
      "asm": "9.6",
      "commons-codec": "1.13",
-      "ecj": "3.36.0-SNAPSHOT",
+      "ecj": "3.36.0",
      "flexmark": "0.61.24",
      "javacc": "7.0.12",
      "jflex": "1.8.2",
--- a/gradle/testing/profiling.gradle
+++ b/gradle/testing/profiling.gradle
@ -15,20 +15,18 @@
 * limitations under the License.
 */

-import org.apache.lucene.gradle.ProfileResults;
-
 def recordings = files()

 allprojects {
  plugins.withType(JavaPlugin) {
    ext {
      testOptions += [
-          [propName: 'tests.profile', value: false, description: "Enable java flight recorder profiling."]
+          [propName: 'tests.profile', value: false, description: "Enable Java Flight Recorder profiling."]
      ]
    }

    if (resolvedTestOption("tests.profile").toBoolean()) {
-      allprojects {
+      if (rootProject.hasJavaFlightRecorder) {
        tasks.withType(Test) {
          jvmArgs("-XX:StartFlightRecording=dumponexit=true,maxsize=250M,settings=" + rootProject.file("gradle/testing/profiling.jfc"),
              "-XX:+UnlockDiagnosticVMOptions",
@ -41,6 +39,8 @@ allprojects {
            recordings = recordings.plus fileTree(dir: workingDir, include: '*.jfr')
          }
        }
+      } else {
+        throw new GradleException('Module jdk.jfr is not available; Java Flight Recorder profiles cannot be enabled.')
      }
    }
  }
@ -48,10 +48,11 @@ allprojects {

 gradle.buildFinished {
  if (!recordings.isEmpty()) {
-    ProfileResults.printReport(recordings.getFiles().collect { it.toString() },
-        propertyOrDefault(ProfileResults.MODE_KEY, ProfileResults.MODE_DEFAULT) as String,
-        Integer.parseInt(propertyOrDefault(ProfileResults.STACKSIZE_KEY, ProfileResults.STACKSIZE_DEFAULT)),
-        Integer.parseInt(propertyOrDefault(ProfileResults.COUNT_KEY, ProfileResults.COUNT_DEFAULT)),
-        Boolean.parseBoolean(propertyOrDefault(ProfileResults.LINENUMBERS_KEY, ProfileResults.LINENUMBERS_DEFAULT)))
+    def pr = org.apache.lucene.gradle.ProfileResults;
+    pr.printReport(recordings.getFiles().collect { it.toString() },
+        propertyOrDefault(pr.MODE_KEY, pr.MODE_DEFAULT) as String,
+        Integer.parseInt(propertyOrDefault(pr.STACKSIZE_KEY, pr.STACKSIZE_DEFAULT)),
+        Integer.parseInt(propertyOrDefault(pr.COUNT_KEY, pr.COUNT_DEFAULT)),
+        Boolean.parseBoolean(propertyOrDefault(pr.LINENUMBERS_KEY, pr.LINENUMBERS_DEFAULT)))
  }
 }
--- a/gradle/testing/randomization/policies/replicator-tests.policy
+++ b/gradle/testing/randomization/policies/replicator-tests.policy
@ -23,8 +23,6 @@ grant {
  // jetty-specific:
  permission java.lang.RuntimePermission "getenv.JETTY_AVAILABLE_PROCESSORS";
  permission java.lang.RuntimePermission "getenv.JETTY_WORKER_INSTANCE";
-  // servlet stuff
-  permission java.lang.RuntimePermission "setContextClassLoader";
  // allow TestNRTReplication fork its jvm
  permission java.io.FilePermission "${java.home}${/}-", "read,execute";
  // read/write access to all system properties (required by jetty in these tests)
--- a/gradle/testing/randomization/policies/tests.policy
+++ b/gradle/testing/randomization/policies/tests.policy
@ -50,14 +50,11 @@ grant {
  permission java.lang.RuntimePermission "getStackTrace";
  // needed for mock filesystems in tests
  permission java.lang.RuntimePermission "fileSystemProvider";
-  // analyzers/uima: needed by lucene expressions' JavascriptCompiler
-  permission java.lang.RuntimePermission "createClassLoader";
  // needed to test unmap hack on platforms that support it
  permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
  permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
  // needed by cyberneko usage by benchmarks on J9
  permission java.lang.RuntimePermission "accessClassInPackage.org.apache.xerces.util";
-  permission java.lang.RuntimePermission "getClassLoader";

  // Needed for loading native library (lucene:misc:native) in lucene:misc
  permission java.lang.RuntimePermission "getFileStoreAttributes";
@ -111,6 +108,8 @@ grant {
  permission java.lang.RuntimePermission "shutdownHooks";
  // needed by jacoco to instrument classes
  permission java.lang.RuntimePermission "defineClass";
+  // needed by jacoco for God knows what.
+  permission java.lang.RuntimePermission "createClassLoader";
 };

 // Grant all permissions to Gradle test runner classes.
--- a/gradle/validation/check-environment.gradle
+++ b/gradle/validation/check-environment.gradle
@ -23,6 +23,7 @@ import org.gradle.util.GradleVersion
 configure(rootProject) {
  ext {
    expectedGradleVersion = '8.4'
+    hasJavaFlightRecorder = ModuleLayer.boot().findModule('jdk.jfr').map(this.class.module::canRead).orElse(false)
  }

  wrapper {
--- a/gradle/validation/error-prone.gradle
+++ b/gradle/validation/error-prone.gradle
@ -17,8 +17,8 @@

 def skipReason

-if (rootProject.usesAltJvm && rootProject.runtimeJavaVersion > JavaVersion.VERSION_15) {
-  skipReason = "won't work with JDK ${rootProject.runtimeJavaVersion} if used as alternative java toolchain"
+if (rootProject.usesAltJvm) {
+  skipReason = "won't work with alternative java toolchain"
 }

 if (!propertyOrDefault("validation.errorprone", isCIBuild).asBoolean()) {
@ -37,7 +37,7 @@ if (skipReason) {

 allprojects { prj ->
  plugins.withType(JavaPlugin) {
-    // LUCENE-9650: Errorprone on master/gradle does not work with JDK-16+ when running as plugin
+    // LUCENE-9650: Errorprone on master/gradle does not work when running as plugin
    // inside a forked Javac process. Javac running inside Gradle works, because we have
    // additional module system opens in place.
    // This is a hack to keep the dependency (so that palantir's version check doesn't complain)
--- a/gradle/validation/jar-checks.gradle
+++ b/gradle/validation/jar-checks.gradle
@ -59,6 +59,9 @@ allprojects {
 }

 subprojects {
+  // initialize empty, because no checks for benchmark-jmh module.
+  ext.jarInfos = []
+
  // Configure jarValidation configuration for all projects. Any dependency
  // declared on this configuration (or any configuration it extends from) will
  // be verified.
--- a/help/jmh.txt
+++ b/help/jmh.txt
@ -61,6 +61,7 @@ Otherwise you are stuck wrestling down full dependencies of OpenJDK (metal etc)
 Also you must run benchmarks as root to use dtrace, but it works.

  $ git clone --depth 1 https://github.com/openjdk/jdk/
+  $ curl -f https://ftp.gnu.org/gnu/binutils/binutils-2.38.tar.gz | tar -zxf -
  $ curl -fo jdk/src/utils/hsdis/binutils/Makefile https://raw.githubusercontent.com/openjdk/jdk/3c7ae1225f0d5575fd927a9b76fb40dc30e208cd/src/utils/hsdis/Makefile
  $ vi jdk/src/utils/hsdis/binutils/Makefile, change SOURCE = hsdis.c to SOURCE = hsdis-binutils.c
  $ vi jdk/src/utils/hsdis/binutils/hsdis-binutils.c, change #include "hsdis.h" to #include "../hsdis.h"
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -7,7 +7,6 @@ http://s.apache.org/luceneversions

 API Changes
 ---------------------
-
 * LUCENE-12092: Remove deprecated UTF8TaxonomyWriterCache. Please use LruTaxonomyWriterCache
  instead. (Vigya Sharma)

@ -62,10 +61,21 @@ API Changes

 * GITHUB#12599: Add RandomAccessInput#readBytes method to the RandomAccessInput interface. (Ignacio Vera)

-* GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
-  of the two (Anh Dung Bui)
+* GITHUB#11023: Adding -level param to CheckIndex, making the old -fast param the default behaviour. (Jakub Slowinski)

-* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
+* GITHUB#12873: Expressions module now uses MethodHandles to define custom functions. Support for
+  custom classloaders was removed. (Uwe Schindler)
+
+* GITHUB#12243: Remove TermInSetQuery ctors taking varargs param. SortedSetDocValuesField#newSlowSetQuery,
+  SortedDocValuesField#newSlowSetQuery, KeywordField#newSetQuery, KeywordField#newSetQuery now take a collection. (Jakub Slowinski)
+
+* GITHUB#12881: Performance improvements to MatchHighlighter and MatchRegionRetriever. MatchRegionRetriever can be
+  configured to not load matches (or content) of certain fields and to force-load other fields so that stored fields
+  of a document are accessed once. A configurable limit of field matches placed in the priority queue was added
+  (allows handling long fields with lots of hits more gracefully). MatchRegionRetriever utilizes IndexSearcher's
+  executor to extract hit offsets concurrently. (Dawid Weiss)
+
+* GITHUB#12855: Remove deprecated DrillSideways#createDrillDownFacetsCollector extension method. (Greg Miller)

 New Features
 ---------------------
@ -89,18 +99,17 @@ Improvements

 * GITHUB#12447: Hunspell: speed up the dictionary enumeration on suggestion (Peter Gromov)

-* GITHUB#12542: FSTCompiler can now approximately limit how much RAM it uses to share
-  suffixes during FST construction using the suffixRAMLimitMB method.  Larger values
-  result in a more minimal FST (more common suffixes are shard).  Pass
-  Double.POSITIVE_INFINITY to use as much RAM as is needed to create a purely
-  minimal FST.  Inspired by this Rust FST implemention:
-  https://blog.burntsushi.net/transducers (Mike McCandless)
+* GITHUB#12873: Expressions module now uses JEP 371 "Hidden Classes" with JEP 309
+  "Dynamic Class-File Constants" to implement Javascript expressions. (Uwe Schindler)

 Optimizations
 ---------------------

 * GITHUB#11857, GITHUB#11859, GITHUB#11893, GITHUB#11909: Hunspell: improved suggestion performance (Peter Gromov)

+* GITHUB#12825, GITHUB#12834: Hunspell: improved dictionary loading performance, allowed in-memory entry sorting.
+  (Peter Gromov)
+
 * GITHUB#12372: Reduce allocation during HNSW construction (Jonathan Ellis)

 * GITHUB#12408: Lazy initialization improvements for Facets implementations when there are segments with no hits
@ -116,6 +125,9 @@ Bug Fixes

 * GITHUB#12220: Hunspell: disallow hidden title-case entries from compound middle/end

+* GITHUB#12878: Fix the declared Exceptions of Expression#evaluate() to match those
+  of DoubleValues#doubleValue(). (Uwe Schindler)
+
 Other
 ---------------------

@ -142,6 +154,48 @@ Other

 * GITHUB#12239: Hunspell: reduced suggestion set dependency on the hash table order (Peter Gromov)

+* GITHUB#9049: Fixing bug in UnescapedCharSequence#toStringEscaped() (Jakub Slowinski)
+
+======================== Lucene 9.10.0 =======================
+
+API Changes
+---------------------
+* GITHUB#12243: Mark TermInSetQuery ctors with varargs terms as @Deprecated. SortedSetDocValuesField#newSlowSetQuery,
+  SortedDocValuesField#newSlowSetQuery, KeywordField#newSetQuery now take a collection of terms as a param. (Jakub Slowinski)
+
+* GITHUB#11041: Deprecate IndexSearch#search(Query, Collector) in favor of
+  IndexSearcher#search(Query, CollectorManager) for TopFieldCollectorManager
+  and TopScoreDocCollectorManager. (Zach Chen, Adrien Grand, Michael McCandless, Greg Miller, Luca Cavanna)
+
+* GITHUB#12854: Mark DrillSideways#createDrillDownFacetsCollector as @Deprecated. (Greg Miller)
+
+New Features
+---------------------
+(No changes)
+
+Improvements
+---------------------
+
+* GITHUB#12870: Tighten synchronized loop in DirectoryTaxonomyReader#getOrdinal. (Stefan Vodita)
+
+* GITHUB#12812: Avoid overflows and false negatives in int slice buffer filled-with-zeros assertion. (Stefan Vodita)
+
+Optimizations
+---------------------
+(No changes)
+
+Bug Fixes
+---------------------
+* GITHUB#12866: Prevent extra similarity computation for single-level HNSW graphs. (Kaival Parikh)
+
+* GITHUB#12558: Ensure #finish is called on all drill-sideways FacetsCollectors even when no hits are scored.
+  (Greg Miller)
+
+Other
+---------------------
+
+* GITHUB#11023: Removing some dead code in CheckIndex. (Jakub Slowinski)
+
 ======================== Lucene 9.9.0 =======================

 API Changes
@ -157,9 +211,6 @@ API Changes
 * GITHUB#12592: Add RandomAccessInput#length method to the RandomAccessInput interface. In addition deprecate
  ByteBuffersDataInput#size in favour of this new method. (Ignacio Vera)

-* GITHUB#12646, GITHUB#12690: Move FST#addNode to FSTCompiler to avoid a circular dependency
-  between FST and FSTCompiler (Anh Dung Bui)
-
 * GITHUB#12718: Make IndexSearcher#getSlices final as it is not expected to be overridden (Luca Cavanna)

 * GITHUB#12427: Automata#makeStringUnion #makeBinaryStringUnion now accept Iterable<BytesRef> instead of
@ -169,6 +220,25 @@ API Changes
 * GITHUB#12180: Add TaxonomyReader#getBulkOrdinals method to more efficiently retrieve facet ordinals for multiple
  FacetLabel at once. (Egor Potemkin)

+* GITHUB#12816: Add HumanReadableQuery which takes a description parameter for debugging purposes. (Jakub Slowinski)
+
+* GITHUB#12646, GITHUB#12690: Move FST#addNode to FSTCompiler to avoid a circular dependency
+  between FST and FSTCompiler (Anh Dung Bui)
+
+* GITHUB#12709: Consolidate FSTStore and BytesStore in FST. Created FSTReader which contains the common methods
+  of the two (Anh Dung Bui)
+
+* GITHUB#12735: Remove FSTCompiler#getTermCount() and FSTCompiler.UnCompiledNode#inputCount (Anh Dung Bui)
+
+* GITHUB-12695: Remove public constructor of FSTCompiler. Please use FSTCompiler.Builder
+  instead. (Juan M. Caicedo)
+
+* GITHUB#12799: Make TaskExecutor constructor public and use TaskExecutor for concurrent
+  HNSW graph build. (Shubham Chaudhary)
+
+* GITHUB#12758, GITHUB#12803: Remove FST constructor with DataInput for metadata. Please
+  use the constructor with FSTMetadata instead. (Anh Dung Bui)
+
 New Features
 ---------------------

@ -180,7 +250,7 @@ New Features

 * GITHUB#12582: Add int8 scalar quantization to the HNSW vector format. This optionally allows for more compact lossy
  storage for the vectors, requiring about 75% memory for fast HNSW search. (Ben Trent)
-  
+
 * GITHUB#12660: HNSW graph now can be merged with multiple thread. Configurable in Lucene99HnswVectorsFormat.
  (Patrick Zhai)

@ -225,6 +295,22 @@ Improvements
 * GITHUB#12754: Refactor lookup of Hotspot VM options and do not initialize constants with NULL
  if SecurityManager prevents access.  (Uwe Schindler)

+* GITHUB#12801: Remove possible contention on a ReentrantReadWriteLock in
+  Monitor which could result in searches waiting for commits. (Davis Cook)
+
+* GITHUB#11277, LUCENE-10241: Upgrade to OpenNLP to 1.9.4. (Jeff Zemerick)
+
+* GITHUB#12542: FSTCompiler can now approximately limit how much RAM it uses to share
+  suffixes during FST construction using the suffixRAMLimitMB method.  Larger values
+  result in a more minimal FST (more common suffixes are shard).  Pass
+  Double.POSITIVE_INFINITY to use as much RAM as is needed to create a purely
+  minimal FST.  Inspired by this Rust FST implemention:
+  https://blog.burntsushi.net/transducers (Mike McCandless)
+
+* GITHUB#12738: NodeHash now stores the FST nodes data instead of just node addresses (Anh Dung Bui)
+
+* GITHUB#12847: Test2BFST now reports the time it took to build the FST and the real FST size (Anh Dung Bui)
+
 Optimizations
 ---------------------
 * GITHUB#12183: Make TermStates#build concurrent. (Shubham Chaudhary)
@ -276,10 +362,14 @@ Optimizations

 * GITHUB#12381: Skip docs with DocValues in NumericLeafComparator. (Lu Xugang, Adrien Grand)

-* GITHUB#12748: Specialize arc store for continuous label in FST. (Guo Feng, Chao Zhang)
-
 * GITHUB#12784: Cache buckets to speed up BytesRefHash#sort. (Guo Feng)

+* GITHUB#12806: Utilize exact kNN search when gathering k >= numVectors in a segment (Ben Trent)
+
+* GITHUB#12782: Use group-varint encoding for the tail of postings. (Adrien Grand, Zhang Chao)
+
+* GITHUB#12748: Specialize arc store for continuous label in FST. (Guo Feng, Zhang Chao)
+
 Changes in runtime behavior
 ---------------------

@ -311,22 +401,33 @@ Bug Fixes

 * GITHUB#12770: Stop exploring HNSW graph if scores are not getting better. (Ben Trent)

+* GITHUB#12640: Ensure #finish is called on all drill-sideways collectors even if one throws a
+  CollectionTerminatedException (Greg Miller)
+
+* GITHUB#12626: Fix segmentInfos replace to set userData (Shibi Balamurugan, Uwe Schindler, Marcus Eagan, Michael Froh)
+
 Build
 ---------------------

 * GITHUB#12752: tests.multiplier could be omitted in test failure reproduce lines (esp. in
  nightly mode). (Dawid Weiss)

-* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed 
+* GITHUB#12742: JavaCompile tasks may be in up-to-date state when modular dependencies have changed
  leading to odd runtime errors (Chris Hostetter, Dawid Weiss)

 * GITHUB#12612: Upgrade forbiddenapis to version 3.6 and ASM for APIJAR extraction to 9.6.  (Uwe Schindler)

 * GITHUB#12655: Upgrade to Gradle 8.4 (Kevin Risden)

+* GITHUB#12845: Only enable support for tests.profile if jdk.jfr module is available
+  in Gradle runtime. (Uwe Schindler)
+
 Other
 ---------------------

+* GITHUB#12817: Add demo for faceting with StringValueFacetCounts over KeywordField and SortedDocValuesField.
+  (Stefan Vodita)
+
 * GITHUB#12657: Internal refactor of HNSW graph merging (Ben Trent).

 * GITHUB#12625: Refactor ByteBlockPool so it is just a "shift/mask big array". (Ignacio Vera)
@ -336,6 +437,8 @@ Other
  overflows and slices that are too large. Some bits of code are simplified. Documentation is updated and expanded.
  (Stefan Vodita)

+* GITHUB#12762: Refactor BKD HeapPointWriter to hide the internal data structure. (Ignacio Vera)
+
 ======================== Lucene 9.8.0 =======================

 API Changes
@ -364,6 +467,8 @@ New Features
 * GITHUB#12479: Add new Maximum Inner Product vector similarity function for non-normalized dot-product
  vector search. (Jack Mazanec, Ben Trent)

+* GITHUB#12525: `WordDelimiterGraphFilterFactory` now supports the `ignoreKeywords` flag (Thomas De Craemer)
+
 * GITHUB#12489: Add support for recursive graph bisection, also called
  bipartite graph partitioning, and often abbreviated BP, an algorithm for
  reordering doc IDs that results in more compact postings and faster queries,
@ -386,7 +491,7 @@ Improvements
 Optimizations
 ---------------------

-* GITHUB#12377: Avoid redundant loop for compute min value in DirectMonotonicWriter. (Chao Zhang)
+* GITHUB#12377: Avoid redundant loop for compute min value in DirectMonotonicWriter. (Zhang Chao)

 * GITHUB#12361: Faster top-level disjunctions sorted by descending score.
  (Adrien Grand)
@ -401,7 +506,7 @@ Optimizations

 * GITHUB#12385: Restore parallel knn query rewrite across segments rather than slices (Luca Cavanna)

-* GITHUB#12381: Speed up NumericDocValuesWriter with index sorting. (Chao Zhang)
+* GITHUB#12381: Speed up NumericDocValuesWriter with index sorting. (Zhang Chao)

 * GITHUB#12453: Faster bulk numeric reads from BufferedIndexInput (Armin Braun)

@ -468,7 +573,7 @@ Other
 * GITHUB#12428: Replace consecutive close() calls and close() calls with null checks with IOUtils.close().
  (Shubham Chaudhary)

-* GITHUB#12512: Remove unused variable in BKDWriter. (Chao Zhang)
+* GITHUB#12512: Remove unused variable in BKDWriter. (Zhang Chao)

 ======================== Lucene 9.7.0 =======================

--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@ -19,6 +19,11 @@

 ## Migration from Lucene 9.x to Lucene 10.0

+### Minor API changes in MatchHighlighter and MatchRegionRetriever. (GITHUB#12881)
+
+The API of interfaces for accepting highlights has changed to allow performance improvements. Look at the issue and the PR diff to get
+a sense of what's changed (changes are minor).
+
 ### Removed deprecated IndexSearcher.doc, IndexReader.document, IndexReader.getTermVectors (GITHUB#11998)

 The deprecated Stored Fields and Term Vectors apis relied upon threadlocal storage and have been removed.
@ -101,6 +106,34 @@ The deprecated getter for the `Executor` that was optionally provided to the `In
 has been removed. Users that want to execute concurrent tasks should rely instead on the `TaskExecutor` 
 that the searcher holds, retrieved via `IndexSearcher#getTaskExecutor`.

+### CheckIndex params -slow and -fast are deprecated, replaced by -level X (GITHUB#11023)
+
+The `CheckIndex` former `-fast` behaviour of performing checksum checks only, is now the default.
+Added a new parameter: `-level X`, to set the detail level of the index check. The higher the value, the more checks are performed.
+Sample `-level` usage: `1` (Default) - Checksum checks only, `2` - all level 1 checks as well as logical integrity checks, `3` - all
+level 2 checks as well as slow checks.
+
+### Expressions module now uses `MethodHandle` and hidden classes (GITHUB#12873)
+
+Custom functions in the expressions module must now be passed in a `Map` using `MethodHandle` as values.
+To convert legacy code using maps of reflective `java.lang.reflect.Method`, use the converter method
+`JavascriptCompiler#convertLegacyFunctions`. This should make the mapping mostly compatible.
+The use of `MethodHandle` and [Dynamic Class-File Constants (JEP 309)](https://openjdk.org/jeps/309)
+now also allows to pass private methods or methods from different classloaders. It is also possible
+to adapt guards or filters using the `MethodHandles` class.
+
+The new implementation of the Javascript expressions compiler no longer supports use of custom
+`ClassLoader`, because it uses the new JDK 15 feature [hidden classes (JEP 371)](https://openjdk.org/jeps/371).
+Due to the use of `MethodHandle`, classloader isolation is no longer needed, because JS code can only call
+MHs that were resolved by the application before using the expressions module.
+
+### `Expression#evaluate()` declares to throw IOException (GITHUB#12878)
+
+The expressions module has changed the `Expression#evaluate()` method signature:
+It now declares that it may throw `IOException`. This was an oversight because
+compiled expressions call `DoubleValues#doubleValue` behind the scenes, which
+may throw `IOException` on index problems, bubbling up unexpectedly to the caller.
+
 ## Migration from Lucene 9.0 to Lucene 9.1

 ### Test framework package migration and module (LUCENE-10301)
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
@ -105,7 +105,8 @@ public class NormalizeCharMap {
      final FST<CharsRef> map;
      try {
        final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
-        final FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs);
+        final FSTCompiler<CharsRef> fstCompiler =
+            new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE2, outputs).build();
        final IntsRefBuilder scratch = new IntsRefBuilder();
        for (Map.Entry<String, String> ent : pendingPairs.entrySet()) {
          fstCompiler.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue()));
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
@ -777,7 +777,6 @@ class KStemmer {
  private int stemLength() {
    return j + 1;
  }
-  ;

  private boolean endsIn(char[] s) {
    if (s.length > k) return false;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ConvTable.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/ConvTable.java
@ -40,7 +40,8 @@ class ConvTable {

    try {
      Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton();
-      FSTCompiler<CharsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, outputs);
+      FSTCompiler<CharsRef> fstCompiler =
+          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE2, outputs).build();
      IntsRefBuilder scratchInts = new IntsRefBuilder();
      for (Map.Entry<String, String> entry : mappings.entrySet()) {
        String key = entry.getKey();
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@ -50,18 +50,12 @@ import java.util.Set;
 import java.util.TreeMap;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.analysis.hunspell.SortingStrategy.EntryAccumulator;
+import org.apache.lucene.analysis.hunspell.SortingStrategy.EntrySupplier;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
-import org.apache.lucene.util.OfflineSorter;
-import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
-import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;
 import org.apache.lucene.util.fst.IntSequenceOutputs;
@ -215,6 +209,25 @@ public class Dictionary {
      List<InputStream> dictionaries,
      boolean ignoreCase)
      throws IOException, ParseException {
+    this(affix, dictionaries, ignoreCase, SortingStrategy.offline(tempDir, tempFileNamePrefix));
+  }
+
+  /**
+   * Creates a new Dictionary containing the information read from the provided InputStreams to
+   * hunspell affix and dictionary files. You have to close the provided InputStreams yourself.
+   *
+   * @param affix InputStream for reading the hunspell affix file (won't be closed).
+   * @param dictionaries InputStream for reading the hunspell dictionary files (won't be closed).
+   * @param sortingStrategy the entry strategy for the dictionary loading
+   * @throws IOException Can be thrown while reading from the InputStreams
+   * @throws ParseException Can be thrown if the content of the files does not meet expected formats
+   */
+  public Dictionary(
+      InputStream affix,
+      List<InputStream> dictionaries,
+      boolean ignoreCase,
+      SortingStrategy sortingStrategy)
+      throws IOException, ParseException {
    this.ignoreCase = ignoreCase;

    try (BufferedInputStream affixStream =
@ -250,10 +263,11 @@ public class Dictionary {
      readAffixFile(affixStream, decoder, flagEnumerator);

      // read dictionary entries
-      IndexOutput unsorted = tempDir.createTempOutput(tempFileNamePrefix, "dat", IOContext.DEFAULT);
-      int wordCount = mergeDictionaries(dictionaries, decoder, unsorted);
-      String sortedFile = sortWordsOffline(tempDir, tempFileNamePrefix, unsorted);
-      words = readSortedDictionaries(tempDir, sortedFile, flagEnumerator, wordCount);
+      EntryAccumulator acc = sortingStrategy.start();
+      mergeDictionaries(dictionaries, decoder, acc);
+      try (EntrySupplier sorted = acc.finishAndSort()) {
+        words = readSortedDictionaries(flagEnumerator, sorted);
+      }
      flagLookup = flagEnumerator.finish();
      aliases = null; // no longer needed
      morphAliases = null; // no longer needed
@ -631,7 +645,8 @@ public class Dictionary {

  private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOException {
    IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
-    FSTCompiler<IntsRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, outputs);
+    FSTCompiler<IntsRef> fstCompiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();
    IntsRefBuilder scratch = new IntsRefBuilder();
    for (Map.Entry<String, List<Integer>> entry : affixes.entrySet()) {
      Util.toUTF32(entry.getKey(), scratch);
@ -984,52 +999,43 @@ public class Dictionary {
    }
  }

-  private int mergeDictionaries(
-      List<InputStream> dictionaries, CharsetDecoder decoder, IndexOutput output)
+  private void mergeDictionaries(
+      List<InputStream> dictionaries, CharsetDecoder decoder, EntryAccumulator acc)
      throws IOException {
    StringBuilder sb = new StringBuilder();
-    int wordCount = 0;
-    try (ByteSequencesWriter writer = new ByteSequencesWriter(output)) {
-      for (InputStream dictionary : dictionaries) {
-        BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
-        lines.readLine(); // first line is number of entries (approximately, sometimes)
+    for (InputStream dictionary : dictionaries) {
+      BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
+      lines.readLine(); // first line is number of entries (approximately, sometimes)

-        String line;
-        while ((line = lines.readLine()) != null) {
-          // wild and unpredictable code comment rules
-          if (line.isEmpty() || line.charAt(0) == '#' || line.charAt(0) == '\t') {
-            continue;
-          }
-          line = unescapeEntry(line);
-          // if we haven't seen any custom morphological data, try to parse one
-          if (!hasCustomMorphData) {
-            int morphStart = line.indexOf(MORPH_SEPARATOR);
-            if (morphStart >= 0) {
-              String data = line.substring(morphStart + 1);
-              hasCustomMorphData =
-                  splitMorphData(data).stream().anyMatch(s -> !s.startsWith("ph:"));
-            }
-          }
-
-          wordCount += writeNormalizedWordEntry(sb, writer, line);
+      String line;
+      while ((line = lines.readLine()) != null) {
+        // wild and unpredictable code comment rules
+        if (line.isEmpty() || line.charAt(0) == '#' || line.charAt(0) == '\t') {
+          continue;
        }
+        line = unescapeEntry(line);
+        // if we haven't seen any custom morphological data, try to parse one
+        if (!hasCustomMorphData) {
+          int morphStart = line.indexOf(MORPH_SEPARATOR);
+          if (morphStart >= 0) {
+            String data = line.substring(morphStart + 1);
+            hasCustomMorphData = splitMorphData(data).stream().anyMatch(s -> !s.startsWith("ph:"));
+          }
+        }
+
+        writeNormalizedWordEntry(sb, line, acc);
      }
-      CodecUtil.writeFooter(output);
    }
-    return wordCount;
  }

-  /**
-   * @return the number of word entries written
-   */
-  private int writeNormalizedWordEntry(StringBuilder reuse, ByteSequencesWriter writer, String line)
+  private void writeNormalizedWordEntry(StringBuilder reuse, String line, EntryAccumulator acc)
      throws IOException {
    int flagSep = line.indexOf(FLAG_SEPARATOR);
    int morphSep = line.indexOf(MORPH_SEPARATOR);
    assert morphSep > 0;
    assert morphSep > flagSep;
    int sep = flagSep < 0 ? morphSep : flagSep;
-    if (sep == 0) return 0;
+    if (sep == 0) return;

    CharSequence toWrite;
    String beforeSep = line.substring(0, sep);
@ -1043,19 +1049,16 @@ public class Dictionary {

    String written = toWrite.toString();
    sep = written.length() - (line.length() - sep);
-    writer.write(written.getBytes(StandardCharsets.UTF_8));
+    acc.addEntry(written);

    WordCase wordCase = WordCase.caseOf(written, sep);
    if (wordCase == WordCase.MIXED || wordCase == WordCase.UPPER && flagSep > 0) {
-      addHiddenCapitalizedWord(reuse, writer, written.substring(0, sep), written.substring(sep));
-      return 2;
+      addHiddenCapitalizedWord(reuse, acc, written.substring(0, sep), written.substring(sep));
    }
-    return 1;
  }

  private void addHiddenCapitalizedWord(
-      StringBuilder reuse, ByteSequencesWriter writer, String word, String afterSep)
-      throws IOException {
+      StringBuilder reuse, EntryAccumulator acc, String word, String afterSep) throws IOException {
    reuse.setLength(0);
    reuse.append(Character.toUpperCase(word.charAt(0)));
    for (int i = 1; i < word.length(); i++) {
@ -1064,7 +1067,7 @@ public class Dictionary {
    reuse.append(FLAG_SEPARATOR);
    reuse.append(HIDDEN_FLAG);
    reuse.append(afterSep, afterSep.charAt(0) == FLAG_SEPARATOR ? 1 : 0, afterSep.length());
-    writer.write(reuse.toString().getBytes(StandardCharsets.UTF_8));
+    acc.addEntry(reuse.toString());
  }

  String toLowerCase(String word) {
@ -1084,137 +1087,66 @@ public class Dictionary {
    return new String(chars);
  }

-  private String sortWordsOffline(
-      Directory tempDir, String tempFileNamePrefix, IndexOutput unsorted) throws IOException {
-    OfflineSorter sorter =
-        new OfflineSorter(
-            tempDir,
-            tempFileNamePrefix,
-            new Comparator<>() {
-              final BytesRef scratch1 = new BytesRef();
-              final BytesRef scratch2 = new BytesRef();
-
-              private void initScratch(BytesRef o, BytesRef scratch) {
-                scratch.bytes = o.bytes;
-                scratch.offset = o.offset;
-                scratch.length = o.length;
-
-                for (int i = scratch.length - 1; i >= 0; i--) {
-                  if (scratch.bytes[scratch.offset + i] == FLAG_SEPARATOR
-                      || scratch.bytes[scratch.offset + i] == MORPH_SEPARATOR) {
-                    scratch.length = i;
-                    break;
-                  }
-                }
-              }
-
-              @Override
-              public int compare(BytesRef o1, BytesRef o2) {
-                initScratch(o1, scratch1);
-                initScratch(o2, scratch2);
-
-                int cmp = scratch1.compareTo(scratch2);
-                if (cmp == 0) {
-                  // tie break on whole row
-                  return o1.compareTo(o2);
-                } else {
-                  return cmp;
-                }
-              }
-            });
-
-    String sorted;
-    boolean success = false;
-    try {
-      sorted = sorter.sort(unsorted.getName());
-      success = true;
-    } finally {
-      if (success) {
-        tempDir.deleteFile(unsorted.getName());
-      } else {
-        IOUtils.deleteFilesIgnoringExceptions(tempDir, unsorted.getName());
-      }
-    }
-    return sorted;
-  }
-
-  private WordStorage readSortedDictionaries(
-      Directory tempDir, String sorted, FlagEnumerator flags, int wordCount) throws IOException {
-    boolean success = false;
-
+  private WordStorage readSortedDictionaries(FlagEnumerator flags, EntrySupplier sorted)
+      throws IOException {
    Map<String, Integer> morphIndices = new HashMap<>();

    WordStorage.Builder builder =
        new WordStorage.Builder(
-            wordCount, hashFactor(), hasCustomMorphData, flags, allNonSuggestibleFlags());
+            sorted.wordCount(), hashFactor(), hasCustomMorphData, flags, allNonSuggestibleFlags());

-    try (ByteSequencesReader reader =
-        new ByteSequencesReader(tempDir.openChecksumInput(sorted), sorted)) {
+    // TODO: the flags themselves can be double-chars (long) or also numeric
+    // either way the trick is to encode them as char... but they must be parsed differently

-      // TODO: the flags themselves can be double-chars (long) or also numeric
-      // either way the trick is to encode them as char... but they must be parsed differently
+    while (true) {
+      String line = sorted.next();
+      if (line == null) break;

-      while (true) {
-        BytesRef scratch = reader.next();
-        if (scratch == null) {
-          break;
-        }
+      String entry;
+      char[] wordForm;
+      int end;

-        String line = scratch.utf8ToString();
-        String entry;
-        char[] wordForm;
-        int end;
-
-        int flagSep = line.indexOf(FLAG_SEPARATOR);
-        if (flagSep == -1) {
-          wordForm = NOFLAGS;
-          end = line.indexOf(MORPH_SEPARATOR);
-          entry = line.substring(0, end);
-        } else {
-          end = line.indexOf(MORPH_SEPARATOR);
-          boolean hidden = line.charAt(flagSep + 1) == HIDDEN_FLAG;
-          String flagPart = line.substring(flagSep + (hidden ? 2 : 1), end).strip();
-          if (aliasCount > 0 && !flagPart.isEmpty()) {
-            flagPart = getAliasValue(Integer.parseInt(flagPart));
-          }
-
-          wordForm = flagParsingStrategy.parseFlags(flagPart);
-          if (hidden) {
-            wordForm = ArrayUtil.growExact(wordForm, wordForm.length + 1);
-            wordForm[wordForm.length - 1] = HIDDEN_FLAG;
-          }
-          entry = line.substring(0, flagSep);
-        }
-
-        if (entry.isEmpty()) continue;
-
-        int morphDataID = 0;
-        if (end + 1 < line.length()) {
-          List<String> morphFields = readMorphFields(entry, line.substring(end + 1));
-          if (!morphFields.isEmpty()) {
-            morphFields.sort(Comparator.naturalOrder());
-            morphDataID = addMorphFields(morphIndices, String.join(" ", morphFields));
-          }
-        }
-
-        builder.add(entry, wordForm, morphDataID);
-      }
-
-      // finalize last entry
-      success = true;
-      return new WordStorage(builder) {
-        @Override
-        char caseFold(char c) {
-          return Dictionary.this.caseFold(c);
-        }
-      };
-    } finally {
-      if (success) {
-        tempDir.deleteFile(sorted);
+      int flagSep = line.indexOf(FLAG_SEPARATOR);
+      if (flagSep == -1) {
+        wordForm = NOFLAGS;
+        end = line.indexOf(MORPH_SEPARATOR);
+        entry = line.substring(0, end);
      } else {
-        IOUtils.deleteFilesIgnoringExceptions(tempDir, sorted);
+        end = line.indexOf(MORPH_SEPARATOR);
+        boolean hidden = line.charAt(flagSep + 1) == HIDDEN_FLAG;
+        String flagPart = line.substring(flagSep + (hidden ? 2 : 1), end).strip();
+        if (aliasCount > 0 && !flagPart.isEmpty()) {
+          flagPart = getAliasValue(Integer.parseInt(flagPart));
+        }
+
+        wordForm = flagParsingStrategy.parseFlags(flagPart);
+        if (hidden) {
+          wordForm = ArrayUtil.growExact(wordForm, wordForm.length + 1);
+          wordForm[wordForm.length - 1] = HIDDEN_FLAG;
+        }
+        entry = line.substring(0, flagSep);
      }
+
+      if (entry.isEmpty()) continue;
+
+      int morphDataID = 0;
+      if (end + 1 < line.length()) {
+        List<String> morphFields = readMorphFields(entry, line.substring(end + 1));
+        if (!morphFields.isEmpty()) {
+          morphFields.sort(Comparator.naturalOrder());
+          morphDataID = addMorphFields(morphIndices, String.join(" ", morphFields));
+        }
+      }
+
+      builder.add(entry, wordForm, morphDataID);
    }
+
+    return new WordStorage(builder) {
+      @Override
+      char caseFold(char c) {
+        return Dictionary.this.caseFold(c);
+      }
+    };
  }

  /**
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SortingStrategy.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SortingStrategy.java
@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.hunspell;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefComparator;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.OfflineSorter.ByteSequencesReader;
+import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
+
+/**
+ * The strategy defining how a Hunspell dictionary should be loaded, with different tradeoffs. The
+ * entries should be sorted in a special way, and this can be done either in-memory (faster, but
+ * temporarily allocating more memory) or using disk (slower, but not needing much memory).
+ *
+ * @see #offline(Directory, String)
+ * @see #inMemory()
+ */
+public abstract class SortingStrategy {
+
+  abstract EntryAccumulator start() throws IOException;
+
+  interface EntryAccumulator {
+
+    void addEntry(String entry) throws IOException;
+
+    EntrySupplier finishAndSort() throws IOException;
+  }
+
+  interface EntrySupplier extends Closeable {
+    int wordCount();
+
+    /** The next line or {@code null} if the end is reached */
+    String next() throws IOException;
+  }
+
+  /**
+   * An "offline" strategy that creates temporary files in the given directory and uses them for
+   * sorting with {@link OfflineSorter}. It's slower than {@link #inMemory()}, but doesn't need to
+   * load the entire dictionary into memory.
+   */
+  public static SortingStrategy offline(Directory tempDir, String tempFileNamePrefix) {
+    return new SortingStrategy() {
+      @Override
+      EntryAccumulator start() throws IOException {
+        IndexOutput output = tempDir.createTempOutput(tempFileNamePrefix, "dat", IOContext.DEFAULT);
+        ByteSequencesWriter writer = new ByteSequencesWriter(output);
+        return new EntryAccumulator() {
+          int wordCount = 0;
+
+          @Override
+          public void addEntry(String entry) throws IOException {
+            wordCount++;
+            writer.write(entry.getBytes(StandardCharsets.UTF_8));
+          }
+
+          @Override
+          public EntrySupplier finishAndSort() throws IOException {
+            CodecUtil.writeFooter(output);
+            writer.close();
+            String sortedFile = sortWordsOffline();
+            ByteSequencesReader reader =
+                new ByteSequencesReader(tempDir.openChecksumInput(sortedFile), sortedFile);
+            return new EntrySupplier() {
+              boolean success = false;
+
+              @Override
+              public int wordCount() {
+                return wordCount;
+              }
+
+              @Override
+              public String next() throws IOException {
+                BytesRef scratch = reader.next();
+                if (scratch == null) {
+                  success = true;
+                  return null;
+                }
+                return scratch.utf8ToString();
+              }
+
+              @Override
+              public void close() throws IOException {
+                reader.close();
+                if (success) {
+                  tempDir.deleteFile(sortedFile);
+                } else {
+                  IOUtils.deleteFilesIgnoringExceptions(tempDir, sortedFile);
+                }
+              }
+            };
+          }
+
+          private String sortWordsOffline() throws IOException {
+            var sorter = new OfflineSorter(tempDir, tempFileNamePrefix, BytesRefComparator.NATURAL);
+
+            String sorted;
+            boolean success = false;
+            try {
+              sorted = sorter.sort(output.getName());
+              success = true;
+            } finally {
+              if (success) {
+                tempDir.deleteFile(output.getName());
+              } else {
+                IOUtils.deleteFilesIgnoringExceptions(tempDir, output.getName());
+              }
+            }
+            return sorted;
+          }
+        };
+      }
+    };
+  }
+
+  /**
+   * The strategy that loads all entries as {@link String} objects and sorts them in memory. The
+   * entries are then stored in a more compressed way, and the strings are gc-ed, but the loading
+   * itself needs {@code O(dictionary_size)} memory.
+   */
+  public static SortingStrategy inMemory() {
+    return new SortingStrategy() {
+      @Override
+      EntryAccumulator start() {
+        List<String> entries = new ArrayList<>();
+        return new EntryAccumulator() {
+          @Override
+          public void addEntry(String entry) {
+            entries.add(entry);
+          }
+
+          @Override
+          public EntrySupplier finishAndSort() {
+            entries.sort(Comparator.naturalOrder());
+            return new EntrySupplier() {
+              int i = 0;
+
+              @Override
+              public int wordCount() {
+                return entries.size();
+              }
+
+              @Override
+              public String next() {
+                return i < entries.size() ? entries.get(i++) : null;
+              }
+
+              @Override
+              public void close() {}
+            };
+          }
+        };
+      }
+    };
+  }
+}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
@ -350,16 +350,19 @@ abstract class WordStorage {

      currentOrds.clear();
      boolean hasNonHidden = false;
+      boolean isSuggestible = false;
      for (char[] flags : group) {
        if (!hasFlag(flags, Dictionary.HIDDEN_FLAG)) {
          hasNonHidden = true;
-          break;
+        }
+        if (!hasNoSuggestFlag(flags)) {
+          isSuggestible = true;
        }
      }

      for (int i = 0; i < group.size(); i++) {
        char[] flags = group.get(i);
-        if (hasNonHidden && hasFlag(flags, Dictionary.HIDDEN_FLAG)) {
+        if (hasNonHidden && group.size() > 1 && hasFlag(flags, Dictionary.HIDDEN_FLAG)) {
          continue;
        }

@ -388,7 +391,7 @@ abstract class WordStorage {

      int mask =
          (prevCode == 0 ? 0 : COLLISION_MASK)
-              | (group.stream().anyMatch(flags -> !hasNoSuggestFlag(flags)) ? SUGGESTIBLE_MASK : 0)
+              | (isSuggestible ? SUGGESTIBLE_MASK : 0)
              | Math.min(currentEntry.length(), MAX_STORED_LENGTH);
      hashTable[hash] = (mask << OFFSET_BITS) | pos;

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
@ -210,7 +210,8 @@ public final class StemmerOverrideFilter extends TokenFilter {
     */
    public StemmerOverrideMap build() throws IOException {
      ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
-      FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, outputs);
+      FSTCompiler<BytesRef> fstCompiler =
+          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();
      final int[] sort = hash.sort();
      IntsRefBuilder intsSpare = new IntsRefBuilder();
      final int size = hash.size();
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterFactory.java
@ -46,11 +46,11 @@ public class TruncateTokenFilterFactory extends TokenFilterFactory {
  public static final String NAME = "truncate";

  public static final String PREFIX_LENGTH_KEY = "prefixLength";
-  private final byte prefixLength;
+  private final int prefixLength;

  public TruncateTokenFilterFactory(Map<String, String> args) {
    super(args);
-    prefixLength = Byte.parseByte(get(args, PREFIX_LENGTH_KEY, "5"));
+    prefixLength = Integer.parseInt(get(args, PREFIX_LENGTH_KEY, "5"));
    if (prefixLength < 1)
      throw new IllegalArgumentException(
          PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLength);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
@ -163,7 +163,6 @@ public final class WordDelimiterFilter extends TokenFilter {

  private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
  private final KeywordAttribute keywordAttribute = addAttribute(KeywordAttribute.class);
-  ;
  private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncAttribute =
      addAttribute(PositionIncrementAttribute.class);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilter.java
@ -164,7 +164,6 @@ public final class WordDelimiterGraphFilter extends TokenFilter {

  private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
  private final KeywordAttribute keywordAttribute = addAttribute(KeywordAttribute.class);
-  ;
  private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
  private final PositionIncrementAttribute posIncAttribute =
      addAttribute(PositionIncrementAttribute.class);
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterGraphFilterFactory.java
@ -45,7 +45,7 @@ import org.apache.lucene.util.ResourceLoaderAware;
 *             preserveOriginal="0" splitOnNumerics="1" splitOnCaseChange="1"
 *             catenateWords="0" catenateNumbers="0" catenateAll="0"
 *             generateWordParts="1" generateNumberParts="1" stemEnglishPossessive="1"
- *             types="wdfftypes.txt" /&gt;
+ *             types="wdfftypes.txt" ignoreKeywords="0" /&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
 *
@ -100,6 +100,9 @@ public class WordDelimiterGraphFilterFactory extends TokenFilterFactory
    if (getInt(args, "stemEnglishPossessive", 1) != 0) {
      flags |= STEM_ENGLISH_POSSESSIVE;
    }
+    if (getInt(args, "ignoreKeywords", 0) != 0) {
+      flags |= IGNORE_KEYWORDS;
+    }
    wordFiles = get(args, PROTECTED_TOKENS);
    types = get(args, TYPES);
    this.flags = flags;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
@ -216,7 +216,6 @@ public final class SynonymFilter extends TokenFilter {
      count++;
    }
  }
-  ;

  private final ByteArrayDataInput bytesReader = new ByteArrayDataInput();

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
@ -222,7 +222,8 @@ public class SynonymMap {
    public SynonymMap build() throws IOException {
      ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
      // TODO: are we using the best sharing options?
-      FSTCompiler<BytesRef> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE4, outputs);
+      FSTCompiler<BytesRef> fstCompiler =
+          new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();

      BytesRefBuilder scratch = new BytesRefBuilder();
      ByteArrayDataOutput scratchOutput = new ByteArrayDataOutput();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestHTMLStripCharFilter.java
@ -595,8 +595,7 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
        }
    }
    Reader reader = new HTMLStripCharFilter(new StringReader(text.toString()));
-    while (reader.read() != -1)
-      ;
+    while (reader.read() != -1) {}
  }

  public void testUTF16Surrogates() throws Exception {
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestDuelingAnalyzers.java
@ -230,7 +230,6 @@ public class TestDuelingAnalyzers extends BaseTokenStreamTestCase {
      assertEquals(
          "wrong end offset for input: " + s, leftOffset.endOffset(), rightOffset.endOffset());
    }
-    ;
    assertFalse("wrong number of tokens for input: " + s, right.incrementToken());
    left.end();
    right.end();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries.java
@ -41,7 +41,6 @@ import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
-import org.apache.lucene.tests.store.BaseDirectoryWrapper;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.tests.util.LuceneTestCase.SuppressSysoutChecks;
 import org.apache.lucene.tests.util.RamUsageTester;
@ -72,9 +71,8 @@ public class TestAllDictionaries extends LuceneTestCase {
    Path dic = Path.of(affPath.substring(0, affPath.length() - 4) + ".dic");
    assert Files.exists(dic) : dic;
    try (InputStream dictionary = Files.newInputStream(dic);
-        InputStream affix = Files.newInputStream(aff);
-        BaseDirectoryWrapper tempDir = newDirectory()) {
-      return new Dictionary(tempDir, "dictionary", affix, dictionary) {
+        InputStream affix = Files.newInputStream(aff)) {
+      return new Dictionary(affix, List.of(dictionary), false, SortingStrategy.inMemory()) {
        @Override
        protected boolean tolerateAffixRuleCountMismatches() {
          return true;
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestSpellChecking.java
@ -256,15 +256,22 @@ public class TestSpellChecking extends LuceneTestCase {
  }

  static void checkSpellCheckerExpectations(Path basePath) throws IOException, ParseException {
-    InputStream affixStream = Files.newInputStream(Path.of(basePath.toString() + ".aff"));
+    checkSpellCheckerExpectations(
+        basePath, SortingStrategy.offline(new ByteBuffersDirectory(), "dictionary"));
+    checkSpellCheckerExpectations(basePath, SortingStrategy.inMemory());
+  }
+
+  private static void checkSpellCheckerExpectations(Path basePath, SortingStrategy strategy)
+      throws IOException, ParseException {
+    Path affFile = Path.of(basePath + ".aff");
    Path dicFile = Path.of(basePath + ".dic");
+    InputStream affixStream = Files.newInputStream(affFile);
    InputStream dictStream = Files.newInputStream(dicFile);

    Hunspell speller;
    Map<String, Suggester> suggesters = new LinkedHashMap<>();
    try {
-      Dictionary dictionary =
-          new Dictionary(new ByteBuffersDirectory(), "dictionary", affixStream, dictStream);
+      Dictionary dictionary = new Dictionary(affixStream, List.of(dictStream), false, strategy);
      speller = new Hunspell(dictionary, TimeoutPolicy.NO_TIMEOUT, () -> {});
      Suggester suggester = new Suggester(dictionary);
      suggesters.put("default", suggester);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/in/TestIndicNormalizer.java
@ -41,7 +41,6 @@ public class TestIndicNormalizer extends BaseTokenStreamTestCase {

  private void check(String input, String output) throws IOException {
    Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    ;
    tokenizer.setReader(new StringReader(input));
    TokenFilter tf = new IndicNormalizationFilter(tokenizer);
    assertTokenStreamContents(tf, new String[] {output});
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilterFactory.java
@ -89,7 +89,6 @@ public class TestKeywordMarkerFilterFactory extends BaseTokenStreamFactoryTestCa
    stream =
        tokenFilterFactory("KeywordMarker", "pattern", "Cats", "ignoreCase", "true").create(stream);
    stream = tokenFilterFactory("PorterStem").create(stream);
-    ;
    assertTokenStreamContents(stream, new String[] {"dog", "cats", "Cats"});
  }

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTruncateTokenFilterFactory.java
@ -68,4 +68,23 @@ public class TestTruncateTokenFilterFactory extends BaseTokenStreamFactoryTestCa
                TruncateTokenFilterFactory.PREFIX_LENGTH_KEY
                    + " parameter must be a positive number: -5"));
  }
+
+  /** Test that takes length greater than byte limit accepts it */
+  public void testLengthGreaterThanByteLimitArgument() throws Exception {
+    Reader reader =
+        new StringReader(
+            "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvw128characters From here");
+    TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    ((Tokenizer) stream).setReader(reader);
+    stream =
+        tokenFilterFactory("Truncate", TruncateTokenFilterFactory.PREFIX_LENGTH_KEY, "128")
+            .create(stream);
+    assertTokenStreamContents(
+        stream,
+        new String[] {
+          "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvw1",
+          "From",
+          "here"
+        });
+  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestEdgeNGramTokenizer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ngram/TestEdgeNGramTokenizer.java
@ -69,7 +69,6 @@ public class TestEdgeNGramTokenizer extends BaseTokenStreamTestCase {
  public void testOversizedNgrams() throws Exception {
    EdgeNGramTokenizer tokenizer = new EdgeNGramTokenizer(6, 6);
    tokenizer.setReader(input);
-    ;
    assertTokenStreamContents(tokenizer, new String[0], new int[0], new int[0], 5 /* abcde */);
  }

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayIterator.java
@ -156,7 +156,6 @@ public class TestCharArrayIterator extends LuceneTestCase {

  private void consume(BreakIterator bi, CharacterIterator ci) {
    bi.setText(ci);
-    while (bi.next() != BreakIterator.DONE)
-      ;
+    while (bi.next() != BreakIterator.DONE) {}
  }
 }
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.analysis.ja.dict;

+import static org.apache.lucene.util.fst.FST.readMetadata;
+
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@ -103,7 +105,7 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
    FST<Long> fst;
    try (InputStream is = new BufferedInputStream(fstResource.get())) {
      DataInput in = new InputStreamDataInput(is);
-      fst = new FST<>(in, in, PositiveIntOutputs.getSingleton());
+      fst = new FST<>(readMetadata(in, PositiveIntOutputs.getSingleton()), in);
    }
    // TODO: some way to configure?
    this.fst = new TokenInfoFST(fst, true);
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionaryBuilder.java
@ -101,7 +101,8 @@ class TokenInfoDictionaryBuilder {
    lines.sort(Comparator.comparing(entry -> entry[0]));

    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
-    FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, fstOutput);
+    FSTCompiler<Long> fstCompiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput).build();
    IntsRefBuilder scratch = new IntsRefBuilder();
    long ord = -1; // first ord will be 0
    String lastValue = null;
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
@ -93,7 +93,8 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
    List<int[]> segmentations = new ArrayList<>(featureEntries.size());

    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
-    FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, fstOutput);
+    FSTCompiler<Long> fstCompiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput).build();
    IntsRefBuilder scratch = new IntsRefBuilder();
    long ord = 0;

--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java
@ -758,8 +758,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
    for (int i = 0; i < numIterations; i++) {
      try (TokenStream ts = analyzer.tokenStream("ignored", line)) {
        ts.reset();
-        while (ts.incrementToken())
-          ;
+        while (ts.incrementToken()) {}
        ts.end();
      }
    }
@ -775,8 +774,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
      for (String sentence : sentences) {
        try (TokenStream ts = analyzer.tokenStream("ignored", sentence)) {
          ts.reset();
-          while (ts.incrementToken())
-            ;
+          while (ts.incrementToken()) {}
          ts.end();
        }
      }
@ -831,8 +829,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
        new JapaneseTokenizer(newAttributeFactory(), readDict(), false, Mode.NORMAL);
    tokenizer.setReader(new StringReader(doc));
    tokenizer.reset();
-    while (tokenizer.incrementToken())
-      ;
+    while (tokenizer.incrementToken()) {}
  }

  public void testPatchedSystemDict() throws Exception {
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.analysis.ko.dict;

+import static org.apache.lucene.util.fst.FST.readMetadata;
+
 import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@ -102,7 +104,7 @@ public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphDa
    FST<Long> fst;
    try (InputStream is = new BufferedInputStream(fstResource.get())) {
      DataInput in = new InputStreamDataInput(is);
-      fst = new FST<>(in, in, PositiveIntOutputs.getSingleton());
+      fst = new FST<>(readMetadata(in, PositiveIntOutputs.getSingleton()), in);
    }
    this.fst = new TokenInfoFST(fst);
  }
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionaryBuilder.java
@ -94,7 +94,8 @@ class TokenInfoDictionaryBuilder {
    lines.sort(Comparator.comparing(left -> left[0]));

    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
-    FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, fstOutput);
+    FSTCompiler<Long> fstCompiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput).build();
    IntsRefBuilder scratch = new IntsRefBuilder();
    long ord = -1; // first ord will be 0
    String lastValue = null;
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
@ -75,7 +75,8 @@ public final class UserDictionary implements Dictionary<UserMorphData> {
    entries.sort(Comparator.comparing(e -> e.split("\\s+")[0]));

    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
-    FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE2, fstOutput);
+    FSTCompiler<Long> fstCompiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput).build();
    IntsRefBuilder scratch = new IntsRefBuilder();

    String lastToken = null;
--- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
@ -41,7 +41,6 @@ public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
  /** test use of exclusion set */
  public void testExclude() throws IOException {
    CharArraySet exclusionSet = new CharArraySet(asSet("studenta"), false);
-    ;
    Analyzer a = new PolishAnalyzer(PolishAnalyzer.getDefaultStopSet(), exclusionSet);
    checkOneTerm(a, "studenta", "studenta");
    checkOneTerm(a, "studenci", "student");
--- a/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/lucene40/blocktree/FieldReader.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.backward_codecs.lucene40.blocktree;

+import static org.apache.lucene.util.fst.FST.readMetadata;
+
 import java.io.IOException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexOptions;
@ -89,9 +91,17 @@ public final class FieldReader extends Terms {
    final IndexInput clone = indexIn.clone();
    clone.seek(indexStartFP);
    if (metaIn == indexIn) { // Only true before Lucene 8.6
-      index = new FST<>(clone, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
+      index =
+          new FST<>(
+              readMetadata(clone, ByteSequenceOutputs.getSingleton()),
+              clone,
+              new OffHeapFSTStore());
    } else {
-      index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
+      index =
+          new FST<>(
+              readMetadata(metaIn, ByteSequenceOutputs.getSingleton()),
+              clone,
+              new OffHeapFSTStore());
    }
    /*
     if (false) {
--- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestManyPointsInOldIndex.java
+++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestManyPointsInOldIndex.java
@ -22,6 +22,7 @@ import java.nio.file.Path;
 import java.nio.file.Paths;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.IntPoint;
+import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.store.Directory;
@ -70,7 +71,7 @@ public class TestManyPointsInOldIndex extends LuceneTestCase {
    dir.setCheckIndexOnClose(false);

    // ... because we check ourselves here:
-    TestUtil.checkIndex(dir, false, true, true, null);
+    TestUtil.checkIndex(dir, CheckIndex.Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, true, true, null);
    dir.close();
  }
 }
--- a/lucene/benchmark-jmh/build.gradle
+++ b/lucene/benchmark-jmh/build.gradle
@ -23,6 +23,7 @@ description = 'Lucene JMH micro-benchmarking module'

 dependencies {
  moduleImplementation project(':lucene:core')
+  moduleImplementation project(':lucene:expressions')

  moduleImplementation "org.openjdk.jmh:jmh-core:1.37"
  annotationProcessor "org.openjdk.jmh:jmh-generator-annprocess:1.37"
@ -42,7 +43,7 @@ tasks.matching { it.name == "forbiddenApisMain" }.configureEach {
 tasks.matching { it.name in [
    // Turn off JMH dependency checksums and licensing (it's GPL w/ classpath exception
    // but this seems fine for test/build only tools).
-    "validateJarChecksums", "validateJarLicenses",
+    "validateJarChecksums", "validateJarLicenses", "collectJarInfos",
    // No special javadocs for JMH benchmarks.
    "renderSiteJavadoc",
    "renderJavadoc",
--- a/lucene/benchmark-jmh/src/java/module-info.java
+++ b/lucene/benchmark-jmh/src/java/module-info.java
@ -20,6 +20,7 @@ module org.apache.lucene.benchmark.jmh {
  requires jmh.core;
  requires jdk.unsupported;
  requires org.apache.lucene.core;
+  requires org.apache.lucene.expressions;

  exports org.apache.lucene.benchmark.jmh;
  exports org.apache.lucene.benchmark.jmh.jmh_generated;
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ExpressionsBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ExpressionsBenchmark.java
@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.text.ParseException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.expressions.Expression;
+import org.apache.lucene.expressions.js.JavascriptCompiler;
+import org.apache.lucene.search.DoubleValues;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 5, time = 5)
+@Measurement(iterations = 12, time = 8)
+@Fork(value = 1)
+public class ExpressionsBenchmark {
+
+  /**
+   * Some extra functions to bench "identity" in various variants, another one is named
+   * "native_identity" (see below).
+   */
+  private static final Map<String, MethodHandle> FUNCTIONS = getFunctions();
+
+  private static final String NATIVE_IDENTITY_NAME = "native_identity";
+
+  private static Map<String, MethodHandle> getFunctions() {
+    try {
+      var lookup = MethodHandles.lookup();
+      Map<String, MethodHandle> m = new HashMap<>(JavascriptCompiler.DEFAULT_FUNCTIONS);
+      m.put(
+          "func_identity",
+          lookup.findStatic(
+              lookup.lookupClass(), "ident", MethodType.methodType(double.class, double.class)));
+      m.put("mh_identity", MethodHandles.identity(double.class));
+      return m;
+    } catch (ReflectiveOperationException e) {
+      throw new AssertionError(e);
+    }
+  }
+
+  @SuppressWarnings("unused")
+  private static double ident(double v) {
+    return v;
+  }
+
+  /** A native implementation of an expression to compare performance */
+  private static final Expression NATIVE_IDENTITY_EXPRESSION =
+      new Expression(NATIVE_IDENTITY_NAME, new String[] {"x"}) {
+        @Override
+        public double evaluate(DoubleValues[] functionValues) throws IOException {
+          return functionValues[0].doubleValue();
+        }
+      };
+
+  private double[] randomData;
+  private Expression expression;
+
+  @Param({"x", "func_identity(x)", "mh_identity", "native_identity", "cos(x)", "cos(x) + sin(x)"})
+  String js;
+
+  @Setup(Level.Iteration)
+  public void init() throws ParseException {
+    ThreadLocalRandom random = ThreadLocalRandom.current();
+    randomData = random.doubles().limit(1024).toArray();
+    expression =
+        Objects.equals(js, NATIVE_IDENTITY_NAME)
+            ? NATIVE_IDENTITY_EXPRESSION
+            : JavascriptCompiler.compile(js, FUNCTIONS);
+  }
+
+  @Benchmark
+  public double expression() throws IOException {
+    var it = new ValuesIterator(randomData);
+    var values = it.getDoubleValues();
+    double result = 0d;
+    while (it.next()) {
+      result += expression.evaluate(values);
+    }
+    return result;
+  }
+
+  static final class ValuesIterator {
+    final double[] data;
+    final DoubleValues[] dv;
+    int pos = -1;
+
+    ValuesIterator(double[] data) {
+      this.data = data;
+      var dv =
+          new DoubleValues() {
+            @Override
+            public double doubleValue() throws IOException {
+              return data[pos];
+            }
+
+            @Override
+            public boolean advanceExact(int doc) throws IOException {
+              throw new UnsupportedOperationException();
+            }
+          };
+      this.dv = new DoubleValues[] {dv};
+    }
+
+    boolean next() {
+      pos++;
+      return (pos < data.length);
+    }
+
+    DoubleValues[] getDoubleValues() {
+      return dv;
+    }
+  }
+}
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/GroupVIntBenchmark.java
@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.Arrays;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.codecs.lucene99.GroupVIntReader;
+import org.apache.lucene.codecs.lucene99.GroupVIntWriter;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MMapDirectory;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 5, time = 5)
+@Fork(
+    value = 1,
+    jvmArgsPrepend = {"--add-modules=jdk.unsupported"})
+public class GroupVIntBenchmark {
+
+  // Cumulative frequency for each number of bits per value used by doc deltas of tail postings on
+  // wikibigall.
+  private static final float[] CUMULATIVE_FREQUENCY_BY_BITS_REQUIRED =
+      new float[] {
+        0.0f,
+        0.01026574f,
+        0.021453038f,
+        0.03342156f,
+        0.046476692f,
+        0.060890317f,
+        0.07644147f,
+        0.093718216f,
+        0.11424741f,
+        0.13989712f,
+        0.17366524f,
+        0.22071244f,
+        0.2815692f,
+        0.3537585f,
+        0.43655503f,
+        0.52308f,
+        0.6104675f,
+        0.7047371f,
+        0.78155357f,
+        0.8671179f,
+        0.9740598f,
+        1.0f
+      };
+
+  final int maxSize = 256;
+  final long[] values = new long[maxSize];
+
+  IndexInput byteBufferGVIntIn;
+  IndexInput byteBufferVIntIn;
+
+  ByteArrayDataInput byteArrayVIntIn;
+  ByteArrayDataInput byteArrayGVIntIn;
+
+  // @Param({"16", "32", "64", "128", "248"})
+  @Param({"64"})
+  public int size;
+
+  void initArrayInput(long[] docs) throws Exception {
+    byte[] gVIntBytes = new byte[Integer.BYTES * maxSize * 2];
+    byte[] vIntBytes = new byte[Integer.BYTES * maxSize * 2];
+    ByteArrayDataOutput vIntOut = new ByteArrayDataOutput(vIntBytes);
+    GroupVIntWriter w = new GroupVIntWriter();
+    w.writeValues(new ByteArrayDataOutput(gVIntBytes), docs, docs.length);
+    for (long v : docs) {
+      vIntOut.writeVInt((int) v);
+    }
+    byteArrayVIntIn = new ByteArrayDataInput(vIntBytes);
+    byteArrayGVIntIn = new ByteArrayDataInput(gVIntBytes);
+  }
+
+  void initByteBufferInput(long[] docs) throws Exception {
+    Directory dir = MMapDirectory.open(Files.createTempDirectory("groupvintdata"));
+    IndexOutput vintOut = dir.createOutput("vint", IOContext.DEFAULT);
+    IndexOutput gvintOut = dir.createOutput("gvint", IOContext.DEFAULT);
+
+    GroupVIntWriter w = new GroupVIntWriter();
+    w.writeValues(gvintOut, docs, docs.length);
+    for (long v : docs) {
+      vintOut.writeVInt((int) v);
+    }
+    vintOut.close();
+    gvintOut.close();
+    byteBufferGVIntIn = dir.openInput("gvint", IOContext.DEFAULT);
+    byteBufferVIntIn = dir.openInput("vint", IOContext.DEFAULT);
+  }
+
+  @Setup(Level.Trial)
+  public void init() throws Exception {
+    long[] docs = new long[maxSize];
+    Random r = new Random(0);
+    for (int i = 0; i < maxSize; ++i) {
+      float randomFloat = r.nextFloat();
+      // Reproduce the distribution of the number of bits per values that we're observing for tail
+      // postings on wikibigall.
+      int numBits = 1 + Arrays.binarySearch(CUMULATIVE_FREQUENCY_BY_BITS_REQUIRED, randomFloat);
+      if (numBits < 0) {
+        numBits = -numBits;
+      }
+      docs[i] = r.nextInt(1 << (numBits - 1), 1 << numBits);
+    }
+    initByteBufferInput(docs);
+    initArrayInput(docs);
+  }
+
+  @Benchmark
+  public void byteBufferReadVInt(Blackhole bh) throws IOException {
+    byteBufferVIntIn.seek(0);
+    for (int i = 0; i < size; i++) {
+      values[i] = byteBufferVIntIn.readVInt();
+    }
+    bh.consume(values);
+  }
+
+  @Benchmark
+  public void byteBufferReadGroupVInt(Blackhole bh) throws IOException {
+    byteBufferGVIntIn.seek(0);
+    GroupVIntReader.readValues(byteBufferGVIntIn, values, size);
+    bh.consume(values);
+  }
+
+  @Benchmark
+  public void byteArrayReadVInt(Blackhole bh) {
+    byteArrayVIntIn.rewind();
+    for (int i = 0; i < size; i++) {
+      values[i] = byteArrayVIntIn.readVInt();
+    }
+    bh.consume(values);
+  }
+
+  @Benchmark
+  public void byteArrayReadGroupVInt(Blackhole bh) throws IOException {
+    byteArrayGVIntIn.rewind();
+    GroupVIntReader.readValues(byteArrayGVIntIn, values, size);
+    bh.consume(values);
+  }
+}
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java
@ -30,8 +30,8 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.TopFieldCollector;
-import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.search.TopFieldCollectorManager;
+import org.apache.lucene.search.TopScoreDocCollectorManager;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;

@ -110,15 +110,17 @@ public abstract class ReadTask extends PerfTask {
            // the IndexSearcher search methods that take
            // Weight public again, we can go back to
            // pulling the Weight ourselves:
-            TopFieldCollector collector =
-                TopFieldCollector.create(sort, numHits, withTotalHits() ? Integer.MAX_VALUE : 1);
-            searcher.search(q, collector);
-            hits = collector.topDocs();
+            int totalHitsThreshold = withTotalHits() ? Integer.MAX_VALUE : 1;
+            TopFieldCollectorManager collectorManager =
+                new TopFieldCollectorManager(
+                    sort, numHits, null, totalHitsThreshold, searcher.getSlices().length > 1);
+            hits = searcher.search(q, collectorManager);
          } else {
            hits = searcher.search(q, numHits);
          }
        } else {
          Collector collector = createCollector();
+
          searcher.search(q, collector);
          // hits = collector.topDocs();
        }
@ -183,7 +185,8 @@ public abstract class ReadTask extends PerfTask {
  }

  protected Collector createCollector() throws Exception {
-    return TopScoreDocCollector.create(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
+    return new TopScoreDocCollectorManager(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1)
+        .newCollector();
  }

  protected Document retrieveDoc(StoredFields storedFields, int id) throws IOException {
--- a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java
@ -207,7 +207,8 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {

  private void updateFST(SortedMap<String, Double> weights) throws IOException {
    PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
-    FSTCompiler<Long> fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
+    FSTCompiler<Long> fstCompiler =
+        new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
    BytesRefBuilder scratchBytes = new BytesRefBuilder();
    IntsRefBuilder scratchInts = new IntsRefBuilder();
    for (Map.Entry<String, Double> entry : weights.entrySet()) {
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.codecs.blockterms;

+import static org.apache.lucene.util.fst.FST.readMetadata;
+
 import java.io.IOException;
 import java.util.Collection;
 import java.util.Collections;
@ -154,7 +156,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
    public FieldIndexData(IndexInput in, FieldInfo fieldInfo, long indexStart) throws IOException {
      IndexInput clone = in.clone();
      clone.seek(indexStart);
-      fst = new FST<>(clone, clone, fstOutputs);
+      fst = new FST<>(readMetadata(clone, fstOutputs), clone);
      clone.close();

      /*
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexWriter.java
@ -238,7 +238,7 @@ public class VariableGapTermsIndexWriter extends TermsIndexWriterBase {
    public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
      this.fieldInfo = fieldInfo;
      fstOutputs = PositiveIntOutputs.getSingleton();
-      fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, fstOutputs);
+      fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs).build();
      indexStart = out.getFilePointer();
      //// System.out.println("VGW: field=" + fieldInfo.name);

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsFieldReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsFieldReader.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.codecs.blocktreeords;

+import static org.apache.lucene.util.fst.FST.readMetadata;
+
 import java.io.IOException;
 import org.apache.lucene.codecs.blocktreeords.FSTOrdsOutputs.Output;
 import org.apache.lucene.index.FieldInfo;
@ -85,7 +87,7 @@ final class OrdsFieldReader extends Terms {
      final IndexInput clone = indexIn.clone();
      // System.out.println("start=" + indexStartFP + " field=" + fieldInfo.name);
      clone.seek(indexStartFP);
-      index = new FST<>(clone, clone, OrdsBlockTreeTermsWriter.FST_OUTPUTS);
+      index = new FST<>(readMetadata(clone, OrdsBlockTreeTermsWriter.FST_OUTPUTS), clone);

      /*
      if (true) {
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@ -194,7 +194,8 @@ public class FSTTermsReader extends FieldsProducer {
      this.sumDocFreq = sumDocFreq;
      this.docCount = docCount;
      OffHeapFSTStore offHeapFSTStore = new OffHeapFSTStore();
-      this.dict = new FST<>(in, in, new FSTTermOutputs(fieldInfo), offHeapFSTStore);
+      FSTTermOutputs outputs = new FSTTermOutputs(fieldInfo);
+      this.dict = new FST<>(FST.readMetadata(in, outputs), in, offHeapFSTStore);
      in.skipBytes(offHeapFSTStore.size());
    }

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsWriter.java
@ -251,12 +251,12 @@ public class FSTTermsWriter extends FieldsConsumer {
    private final IntsRefBuilder scratchTerm = new IntsRefBuilder();
    private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance();

-    TermsWriter(FieldInfo fieldInfo) {
+    TermsWriter(FieldInfo fieldInfo) throws IOException {
      this.numTerms = 0;
      this.fieldInfo = fieldInfo;
      postingsWriter.setField(fieldInfo);
      this.outputs = new FSTTermOutputs(fieldInfo);
-      this.fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
+      this.fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
    }

    public void finishTerm(BytesRef text, BlockTermState state) throws IOException {
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
@ -683,7 +683,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
      final PairOutputs<Long, Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
      final PairOutputs<PairOutputs.Pair<Long, Long>, PairOutputs.Pair<Long, Long>> outputs =
          new PairOutputs<>(outputsOuter, outputsInner);
-      fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
+      fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
      IndexInput in = SimpleTextFieldsReader.this.in.clone();
      in.seek(termsStart);
      final BytesRefBuilder lastTerm = new BytesRefBuilder();
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextStoredFieldsFormat.java
@ -37,7 +37,6 @@ public class SimpleTextStoredFieldsFormat extends StoredFieldsFormat {
  @Override
  public StoredFieldsReader fieldsReader(
      Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
-    ;
    return new SimpleTextStoredFieldsReader(directory, si, fn, context);
  }

--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java
@ -89,10 +89,11 @@ public class FSTDictionary implements IndexDictionary {
      isFSTOnHeap = true;
    }
    PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
+    FST.FSTMetadata<Long> metadata = FST.readMetadata(fstDataInput, fstOutputs);
    FST<Long> fst =
        isFSTOnHeap
-            ? new FST<>(fstDataInput, fstDataInput, fstOutputs)
-            : new FST<>(fstDataInput, fstDataInput, fstOutputs, new OffHeapFSTStore());
+            ? new FST<>(metadata, fstDataInput)
+            : new FST<>(metadata, fstDataInput, new OffHeapFSTStore());
    return new FSTDictionary(fst);
  }

@ -171,9 +172,9 @@ public class FSTDictionary implements IndexDictionary {
    protected final FSTCompiler<Long> fstCompiler;
    protected final IntsRefBuilder scratchInts;

-    public Builder() {
+    public Builder() throws IOException {
      PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
-      fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
+      fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs).build();
      scratchInts = new IntsRefBuilder();
    }

--- a/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java
@ -100,5 +100,4 @@ public abstract class DelegatingAnalyzerWrapper extends AnalyzerWrapper {
      }
    }
  }
-  ;
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
@ -70,7 +70,6 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {

  /** Called after a doc and all its fields have been added. */
  public void finishDocument() throws IOException {}
-  ;

  /**
   * Called before writing the terms of the field. {@link #startTerm(BytesRef, int)} will be called
@ -82,7 +81,6 @@ public abstract class TermVectorsWriter implements Closeable, Accountable {

  /** Called after a field and all its terms have been added. */
  public void finishField() throws IOException {}
-  ;

  /**
   * Adds a term and its term frequency <code>freq</code>. If this field has positions and/or
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/FieldReader.java
@ -91,7 +91,11 @@ public final class FieldReader extends Terms {
    // Initialize FST always off-heap.
    final IndexInput clone = indexIn.clone();
    clone.seek(indexStartFP);
-    index = new FST<>(metaIn, clone, ByteSequenceOutputs.getSingleton(), new OffHeapFSTStore());
+    index =
+        new FST<>(
+            FST.readMetadata(metaIn, ByteSequenceOutputs.getSingleton()),
+            clone,
+            new OffHeapFSTStore());
    /*
     if (false) {
     final String dotFileName = segment + "_" + fieldInfo.name + ".dot";
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnum.java
@ -30,9 +30,7 @@ import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.automaton.ByteRunnable;
 import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.automaton.TransitionAccessor;
-import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
-import org.apache.lucene.util.fst.Outputs;

 /**
 * This is used to implement efficient {@link Terms#intersect} for block-tree. Note that it cannot
@ -46,7 +44,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
  // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;

  final IndexInput in;
-  static final Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();

  IntersectTermsEnumFrame[] stack;

@ -68,6 +65,9 @@ final class IntersectTermsEnum extends BaseTermsEnum {

  private BytesRef savedStartTerm;

+  private final SegmentTermsEnum.OutputAccumulator outputAccumulator =
+      new SegmentTermsEnum.OutputAccumulator();
+
  // TODO: in some cases we can filter by length?  eg
  // regexp foo*bar must be at least length 6 bytes
  public IntersectTermsEnum(
@ -114,7 +114,6 @@ final class IntersectTermsEnum extends BaseTermsEnum {
    f.prefix = 0;
    f.setState(0);
    f.arc = arc;
-    f.outputPrefix = arc.output();
    f.load(fr.rootCode);

    // for assert:
@ -184,7 +183,9 @@ final class IntersectTermsEnum extends BaseTermsEnum {
    FST.Arc<BytesRef> arc = currentFrame.arc;
    int idx = currentFrame.prefix;
    assert currentFrame.suffix > 0;
-    BytesRef output = currentFrame.outputPrefix;
+
+    outputAccumulator.reset();
+    outputAccumulator.push(arc.output());
    while (idx < f.prefix) {
      final int target = term.bytes[idx] & 0xff;
      // TODO: we could be more efficient for the next()
@ -192,14 +193,14 @@ final class IntersectTermsEnum extends BaseTermsEnum {
      // passed to findTargetArc
      arc = fr.index.findTargetArc(target, arc, getArc(1 + idx), fstReader);
      assert arc != null;
-      output = fstOutputs.add(output, arc.output());
+      outputAccumulator.push(arc.output());
      idx++;
    }

    f.arc = arc;
-    f.outputPrefix = output;
    assert arc.isFinal();
-    f.load(fstOutputs.add(output, arc.nextFinalOutput()));
+    outputAccumulator.push(arc.nextFinalOutput());
+    f.load(outputAccumulator);
    return f;
  }

--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/IntersectTermsEnumFrame.java
@ -55,7 +55,6 @@ final class IntersectTermsEnumFrame {
  int statsSingletonRunLength = 0;
  final ByteArrayDataInput statsReader = new ByteArrayDataInput();

-  byte[] floorData = new byte[32];
  final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();

  // Length of prefix shared by all terms in this block
@ -90,9 +89,6 @@ final class IntersectTermsEnumFrame {

  final ByteArrayDataInput bytesReader = new ByteArrayDataInput();

-  // Cumulative output so far
-  BytesRef outputPrefix;
-
  int startBytePos;
  int suffix;

@ -120,7 +116,7 @@ final class IntersectTermsEnumFrame {
      }
    } while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min);

-    load(null);
+    load((Long) null);
  }

  public void setState(int state) {
@ -142,12 +138,22 @@ final class IntersectTermsEnumFrame {
  }

  void load(BytesRef frameIndexData) throws IOException {
-    if (frameIndexData != null) {
-      floorDataReader.reset(frameIndexData.bytes, frameIndexData.offset, frameIndexData.length);
-      // Skip first long -- has redundant fp, hasTerms
-      // flag, isFloor flag
-      final long code = ite.fr.readVLongOutput(floorDataReader);
-      if ((code & Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) {
+    floorDataReader.reset(frameIndexData.bytes, frameIndexData.offset, frameIndexData.length);
+    load(ite.fr.readVLongOutput(floorDataReader));
+  }
+
+  void load(SegmentTermsEnum.OutputAccumulator outputAccumulator) throws IOException {
+    outputAccumulator.prepareRead();
+    long code = ite.fr.readVLongOutput(outputAccumulator);
+    outputAccumulator.setFloorData(floorDataReader);
+    load(code);
+  }
+
+  void load(Long blockCode) throws IOException {
+    if (blockCode != null) {
+      // This block is the first one in a possible sequence of floor blocks corresponding to a
+      // single seek point from the FST terms index
+      if ((blockCode & Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) {
        // Floor frame
        numFollowFloorBlocks = floorDataReader.readVInt();
        nextFloorLabel = floorDataReader.readByte() & 0xff;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsWriter.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.codecs.lucene90.blocktree;

+import static org.apache.lucene.util.fst.FSTCompiler.getOnHeapReaderWriter;
+
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
@ -525,7 +527,7 @@ public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer {
              // Disable suffixes sharing for block tree index because suffixes are mostly dropped
              // from the FST index and left in the term blocks.
              .suffixRAMLimitMB(0d)
-              .bytesPageBits(pageBits)
+              .dataOutput(getOnHeapReaderWriter(pageBits))
              .build();
      // if (DEBUG) {
      //  System.out.println("  compile index for prefix=" + prefix);
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnum.java
@ -24,6 +24,7 @@ import org.apache.lucene.index.ImpactsEnum;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@ -48,7 +49,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {

  // static boolean DEBUG = BlockTreeTermsWriter.DEBUG;

-  private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
+  private final OutputAccumulator outputAccumulator = new OutputAccumulator();

  // What prefix of the current term was present in the index; when we only next() through the
  // index, this stays at 0.  It's only set when
@ -232,18 +233,24 @@ final class SegmentTermsEnum extends BaseTermsEnum {
    return arcs[ord];
  }

-  // Pushes a frame we seek'd to
  SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, BytesRef frameData, int length)
      throws IOException {
-    scratchReader.reset(frameData.bytes, frameData.offset, frameData.length);
-    final long code = fr.readVLongOutput(scratchReader);
+    outputAccumulator.reset();
+    outputAccumulator.push(frameData);
+    return pushFrame(arc, length);
+  }
+
+  // Pushes a frame we seek'd to
+  SegmentTermsEnumFrame pushFrame(FST.Arc<BytesRef> arc, int length) throws IOException {
+    outputAccumulator.prepareRead();
+    final long code = fr.readVLongOutput(outputAccumulator);
    final long fpSeek = code >>> Lucene90BlockTreeTermsReader.OUTPUT_FLAGS_NUM_BITS;
    final SegmentTermsEnumFrame f = getFrame(1 + currentFrame.ord);
    f.hasTerms = (code & Lucene90BlockTreeTermsReader.OUTPUT_FLAG_HAS_TERMS) != 0;
    f.hasTermsOrig = f.hasTerms;
    f.isFloor = (code & Lucene90BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0;
    if (f.isFloor) {
-      f.setFloorData(scratchReader, frameData);
+      f.setFloorData(outputAccumulator);
    }
    pushFrame(arc, fpSeek, length);

@ -344,9 +351,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {

    FST.Arc<BytesRef> arc;
    int targetUpto;
-    BytesRef output;

    targetBeforeCurrentLength = currentFrame.ord;
+    outputAccumulator.reset();

    if (currentFrame != staticFrame) {

@ -363,7 +370,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {

      arc = arcs[0];
      assert arc.isFinal();
-      output = arc.output();
+      outputAccumulator.push(arc.output());
      targetUpto = 0;

      SegmentTermsEnumFrame lastFrame = stack[0];
@ -373,9 +380,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {

      int cmp = 0;

-      // TODO: reverse vLong byte order for better FST
-      // prefix output sharing
-
      // First compare up to valid seek frames:
      while (targetUpto < targetLimit) {
        cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
@ -394,9 +398,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
                + (char) arc.label()
                + " targetLabel="
                + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
-        if (arc.output() != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
-          output = Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
-        }
+        outputAccumulator.push(arc.output());
+
        if (arc.isFinal()) {
          lastFrame = stack[1 + lastFrame.ord];
        }
@ -484,15 +487,15 @@ final class SegmentTermsEnum extends BaseTermsEnum {
      //   System.out.println("    no seek state; push root frame");
      // }

-      output = arc.output();
+      outputAccumulator.push(arc.output());

      currentFrame = staticFrame;

      // term.length = 0;
      targetUpto = 0;
-      currentFrame =
-          pushFrame(
-              arc, Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
+      outputAccumulator.push(arc.nextFinalOutput());
+      currentFrame = pushFrame(arc, 0);
+      outputAccumulator.pop();
    }

    // if (DEBUG) {
@ -554,9 +557,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
        term.setByteAt(targetUpto, (byte) targetLabel);
        // Aggregate output as we go:
        assert arc.output() != null;
-        if (arc.output() != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
-          output = Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
-        }
+        outputAccumulator.push(arc.output());

        // if (DEBUG) {
        //   System.out.println("    index: follow label=" + toHex(target.bytes[target.offset +
@ -566,11 +567,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {

        if (arc.isFinal()) {
          // if (DEBUG) System.out.println("    arc is final!");
-          currentFrame =
-              pushFrame(
-                  arc,
-                  Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()),
-                  targetUpto);
+          outputAccumulator.push(arc.nextFinalOutput());
+          currentFrame = pushFrame(arc, targetUpto);
+          outputAccumulator.pop();
          // if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" +
          // currentFrame.hasTerms);
        }
@ -630,9 +629,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {

    FST.Arc<BytesRef> arc;
    int targetUpto;
-    BytesRef output;

    targetBeforeCurrentLength = currentFrame.ord;
+    outputAccumulator.reset();

    if (currentFrame != staticFrame) {

@ -649,7 +648,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {

      arc = arcs[0];
      assert arc.isFinal();
-      output = arc.output();
+      outputAccumulator.push(arc.output());
      targetUpto = 0;

      SegmentTermsEnumFrame lastFrame = stack[0];
@ -659,9 +658,6 @@ final class SegmentTermsEnum extends BaseTermsEnum {

      int cmp = 0;

-      // TODO: we should write our vLong backwards (MSB
-      // first) to get better sharing from the FST
-
      // First compare up to valid seek frames:
      while (targetUpto < targetLimit) {
        cmp = (term.byteAt(targetUpto) & 0xFF) - (target.bytes[target.offset + targetUpto] & 0xFF);
@ -680,14 +676,8 @@ final class SegmentTermsEnum extends BaseTermsEnum {
                + (char) arc.label()
                + " targetLabel="
                + (char) (target.bytes[target.offset + targetUpto] & 0xFF);
-        // TODO: we could save the outputs in local
-        // byte[][] instead of making new objs ever
-        // seek; but, often the FST doesn't have any
-        // shared bytes (but this could change if we
-        // reverse vLong byte order)
-        if (arc.output() != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
-          output = Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
-        }
+
+        outputAccumulator.push(arc.output());
        if (arc.isFinal()) {
          lastFrame = stack[1 + lastFrame.ord];
        }
@ -769,15 +759,15 @@ final class SegmentTermsEnum extends BaseTermsEnum {
      // System.out.println("    no seek state; push root frame");
      // }

-      output = arc.output();
+      outputAccumulator.push(arc.output());

      currentFrame = staticFrame;

      // term.length = 0;
      targetUpto = 0;
-      currentFrame =
-          pushFrame(
-              arc, Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()), 0);
+      outputAccumulator.push(arc.nextFinalOutput());
+      currentFrame = pushFrame(arc, 0);
+      outputAccumulator.pop();
    }

    // if (DEBUG) {
@ -839,9 +829,7 @@ final class SegmentTermsEnum extends BaseTermsEnum {
        arc = nextArc;
        // Aggregate output as we go:
        assert arc.output() != null;
-        if (arc.output() != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
-          output = Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.output());
-        }
+        outputAccumulator.push(arc.output());

        // if (DEBUG) {
        // System.out.println("    index: follow label=" + (target.bytes[target.offset +
@ -851,11 +839,9 @@ final class SegmentTermsEnum extends BaseTermsEnum {

        if (arc.isFinal()) {
          // if (DEBUG) System.out.println("    arc is final!");
-          currentFrame =
-              pushFrame(
-                  arc,
-                  Lucene90BlockTreeTermsReader.FST_OUTPUTS.add(output, arc.nextFinalOutput()),
-                  targetUpto);
+          outputAccumulator.push(arc.nextFinalOutput());
+          currentFrame = pushFrame(arc, targetUpto);
+          outputAccumulator.pop();
          // if (DEBUG) System.out.println("    curFrame.ord=" + currentFrame.ord + " hasTerms=" +
          // currentFrame.hasTerms);
        }
@ -1190,4 +1176,68 @@ final class SegmentTermsEnum extends BaseTermsEnum {
  public long ord() {
    throw new UnsupportedOperationException();
  }
+
+  static class OutputAccumulator extends DataInput {
+
+    BytesRef[] outputs = new BytesRef[16];
+    BytesRef current;
+    int num;
+    int outputIndex;
+    int index;
+
+    void push(BytesRef output) {
+      if (output != Lucene90BlockTreeTermsReader.NO_OUTPUT) {
+        outputs = ArrayUtil.grow(outputs, num + 1);
+        outputs[num++] = output;
+      }
+    }
+
+    void pop() {
+      assert num > 0;
+      num--;
+    }
+
+    void reset() {
+      num = 0;
+    }
+
+    void prepareRead() {
+      index = 0;
+      outputIndex = 0;
+      current = outputs[0];
+    }
+
+    /**
+     * Set the last arc as the source of the floorData. This won't change the reading position of
+     * this {@link OutputAccumulator}
+     */
+    void setFloorData(ByteArrayDataInput floorData) {
+      assert outputIndex == num - 1
+          : "floor data should be stored in last arc, get outputIndex: "
+              + outputIndex
+              + ", num: "
+              + num;
+      BytesRef output = outputs[outputIndex];
+      floorData.reset(output.bytes, output.offset + index, output.length - index);
+    }
+
+    @Override
+    public byte readByte() throws IOException {
+      if (index >= current.length) {
+        current = outputs[++outputIndex];
+        index = 0;
+      }
+      return current.bytes[current.offset + index++];
+    }
+
+    @Override
+    public void readBytes(byte[] b, int offset, int len) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public void skipBytes(long numBytes) throws IOException {
+      throw new UnsupportedOperationException();
+    }
+  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/SegmentTermsEnumFrame.java
@ -55,7 +55,7 @@ final class SegmentTermsEnumFrame {
  int statsSingletonRunLength = 0;
  final ByteArrayDataInput statsReader = new ByteArrayDataInput();

-  byte[] floorData = new byte[32];
+  int rewindPos;
  final ByteArrayDataInput floorDataReader = new ByteArrayDataInput();

  // Length of prefix shared by all terms in this block
@ -104,13 +104,9 @@ final class SegmentTermsEnumFrame {
    suffixLengthsReader = new ByteArrayDataInput();
  }

-  public void setFloorData(ByteArrayDataInput in, BytesRef source) {
-    final int numBytes = source.length - (in.getPosition() - source.offset);
-    if (numBytes > floorData.length) {
-      floorData = new byte[ArrayUtil.oversize(numBytes, 1)];
-    }
-    System.arraycopy(source.bytes, source.offset + in.getPosition(), floorData, 0, numBytes);
-    floorDataReader.reset(floorData, 0, numBytes);
+  public void setFloorData(SegmentTermsEnum.OutputAccumulator outputAccumulator) {
+    outputAccumulator.setFloorData(floorDataReader);
+    rewindPos = floorDataReader.getPosition();
    numFollowFloorBlocks = floorDataReader.readVInt();
    nextFloorLabel = floorDataReader.readByte() & 0xff;
    // if (DEBUG) {
@ -247,7 +243,7 @@ final class SegmentTermsEnumFrame {
    nextEnt = -1;
    hasTerms = hasTermsOrig;
    if (isFloor) {
-      floorDataReader.rewind();
+      floorDataReader.setPosition(rewindPos);
      numFollowFloorBlocks = floorDataReader.readVInt();
      assert numFollowFloorBlocks > 0;
      nextFloorLabel = floorDataReader.readByte() & 0xff;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/GroupVIntReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/GroupVIntReader.java
@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene99;
+
+import java.io.IOException;
+import org.apache.lucene.store.DataInput;
+
+/** Decode integers using group-varint. */
+public class GroupVIntReader {
+
+  public static void readValues(DataInput in, long[] docs, int limit) throws IOException {
+    int i;
+    for (i = 0; i <= limit - 4; i += 4) {
+      final int flag = in.readByte() & 0xFF;
+
+      final int n1Minus1 = flag >> 6;
+      final int n2Minus1 = (flag >> 4) & 0x03;
+      final int n3Minus1 = (flag >> 2) & 0x03;
+      final int n4Minus1 = flag & 0x03;
+
+      docs[i] = readLong(in, n1Minus1);
+      docs[i + 1] = readLong(in, n2Minus1);
+      docs[i + 2] = readLong(in, n3Minus1);
+      docs[i + 3] = readLong(in, n4Minus1);
+    }
+    for (; i < limit; ++i) {
+      docs[i] = in.readVInt();
+    }
+  }
+
+  private static long readLong(DataInput in, int numBytesMinus1) throws IOException {
+    switch (numBytesMinus1) {
+      case 0:
+        return in.readByte() & 0xFFL;
+      case 1:
+        return in.readShort() & 0xFFFFL;
+      case 2:
+        return (in.readShort() & 0xFFFFL) | ((in.readByte() & 0xFFL) << 16);
+      default:
+        return in.readInt() & 0xFFFFFFFFL;
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/GroupVIntWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/GroupVIntWriter.java
@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene99;
+
+import java.io.IOException;
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * Encode integers using group-varint. It uses VInt to encode tail values that are not enough for a
+ * group
+ */
+public class GroupVIntWriter {
+
+  // the maximum size of one group is 4 integers + 1 byte flag.
+  private byte[] bytes = new byte[17];
+  private int byteOffset = 0;
+
+  public GroupVIntWriter() {}
+
+  private int encodeValue(int v) {
+    int lastOff = byteOffset;
+    do {
+      bytes[byteOffset++] = (byte) (v & 0xFF);
+      v >>>= 8;
+    } while (v != 0);
+    return byteOffset - lastOff;
+  }
+
+  public void writeValues(DataOutput out, long[] values, int limit) throws IOException {
+    int off = 0;
+
+    // encode each group
+    while ((limit - off) >= 4) {
+      byte flag = 0;
+      byteOffset = 1;
+      flag |= (encodeValue((int) values[off++]) - 1) << 6;
+      flag |= (encodeValue((int) values[off++]) - 1) << 4;
+      flag |= (encodeValue((int) values[off++]) - 1) << 2;
+      flag |= (encodeValue((int) values[off++]) - 1);
+      bytes[0] = flag;
+      out.writeBytes(bytes, byteOffset);
+    }
+
+    // tail vints
+    for (; off < limit; off++) {
+      out.writeVInt((int) values[off]);
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswScalarQuantizedVectorsFormat.java
@ -31,6 +31,7 @@ import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.search.TaskExecutor;
 import org.apache.lucene.util.hnsw.HnswGraph;

 /**
@ -60,7 +61,7 @@ public final class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFo
  private final FlatVectorsFormat flatVectorsFormat;

  private final int numMergeWorkers;
-  private final ExecutorService mergeExec;
+  private final TaskExecutor mergeExec;

  /** Constructs a format using default graph construction parameters */
  public Lucene99HnswScalarQuantizedVectorsFormat() {
@ -84,8 +85,8 @@ public final class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFo
   * @param beamWidth the size of the queue maintained during graph construction.
   * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If
   *     larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec
-   * @param configuredQuantile the quantile for scalar quantizing the vectors, when `null` it is
-   *     calculated based on the vector field dimensions.
+   * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
+   *     it is calculated based on the vector field dimensions.
   * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are
   *     generated by this format to do the merge
   */
@ -93,7 +94,7 @@ public final class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFo
      int maxConn,
      int beamWidth,
      int numMergeWorkers,
-      Float configuredQuantile,
+      Float confidenceInterval,
      ExecutorService mergeExec) {
    super("Lucene99HnswScalarQuantizedVectorsFormat");
    if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) {
@ -121,8 +122,12 @@ public final class Lucene99HnswScalarQuantizedVectorsFormat extends KnnVectorsFo
          "No executor service is needed as we'll use single thread to merge");
    }
    this.numMergeWorkers = numMergeWorkers;
-    this.mergeExec = mergeExec;
-    this.flatVectorsFormat = new Lucene99ScalarQuantizedVectorsFormat(configuredQuantile);
+    if (mergeExec != null) {
+      this.mergeExec = new TaskExecutor(mergeExec);
+    } else {
+      this.mergeExec = null;
+    }
+    this.flatVectorsFormat = new Lucene99ScalarQuantizedVectorsFormat(confidenceInterval);
  }

  @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsFormat.java
@ -27,6 +27,7 @@ import org.apache.lucene.codecs.lucene90.IndexedDISI;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TaskExecutor;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.hnsw.HnswGraph;

@ -137,7 +138,7 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
  private static final FlatVectorsFormat flatVectorsFormat = new Lucene99FlatVectorsFormat();

  private final int numMergeWorkers;
-  private final ExecutorService mergeExec;
+  private final TaskExecutor mergeExec;

  /** Constructs a format using default graph construction parameters */
  public Lucene99HnswVectorsFormat() {
@ -192,7 +193,11 @@ public final class Lucene99HnswVectorsFormat extends KnnVectorsFormat {
          "No executor service is needed as we'll use single thread to merge");
    }
    this.numMergeWorkers = numMergeWorkers;
-    this.mergeExec = mergeExec;
+    if (mergeExec != null) {
+      this.mergeExec = new TaskExecutor(mergeExec);
+    } else {
+      this.mergeExec = null;
+    }
  }

  @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java
@ -92,18 +92,8 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
      } catch (Throwable exception) {
        priorE = exception;
      } finally {
-        try {
-          CodecUtil.checkFooter(meta, priorE);
-          success = true;
-        } finally {
-          if (success == false) {
-            IOUtils.close(flatVectorsReader);
-          }
-        }
+        CodecUtil.checkFooter(meta, priorE);
      }
-    }
-    success = false;
-    try {
      vectorIndex =
          openDataInput(
              state,
@ -237,12 +227,22 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
        || fieldEntry.vectorEncoding != VectorEncoding.FLOAT32) {
      return;
    }
-    RandomVectorScorer scorer = flatVectorsReader.getRandomVectorScorer(field, target);
-    HnswGraphSearcher.search(
-        scorer,
-        new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc),
-        getGraph(fieldEntry),
-        scorer.getAcceptOrds(acceptDocs));
+    final RandomVectorScorer scorer = flatVectorsReader.getRandomVectorScorer(field, target);
+    final KnnCollector collector =
+        new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc);
+    final Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs);
+    if (knnCollector.k() < scorer.maxOrd()) {
+      HnswGraphSearcher.search(scorer, collector, getGraph(fieldEntry), acceptedOrds);
+    } else {
+      // if k is larger than the number of vectors, we can just iterate over all vectors
+      // and collect them
+      for (int i = 0; i < scorer.maxOrd(); i++) {
+        if (acceptedOrds == null || acceptedOrds.get(i)) {
+          knnCollector.incVisitedCount(1);
+          knnCollector.collect(scorer.ordToDoc(i), scorer.score(i));
+        }
+      }
+    }
  }

  @Override
@ -255,12 +255,22 @@ public final class Lucene99HnswVectorsReader extends KnnVectorsReader
        || fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
      return;
    }
-    RandomVectorScorer scorer = flatVectorsReader.getRandomVectorScorer(field, target);
-    HnswGraphSearcher.search(
-        scorer,
-        new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc),
-        getGraph(fieldEntry),
-        scorer.getAcceptOrds(acceptDocs));
+    final RandomVectorScorer scorer = flatVectorsReader.getRandomVectorScorer(field, target);
+    final KnnCollector collector =
+        new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc);
+    final Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs);
+    if (knnCollector.k() < scorer.maxOrd()) {
+      HnswGraphSearcher.search(scorer, collector, getGraph(fieldEntry), acceptedOrds);
+    } else {
+      // if k is larger than the number of vectors, we can just iterate over all vectors
+      // and collect them
+      for (int i = 0; i < scorer.maxOrd(); i++) {
+        if (acceptedOrds == null || acceptedOrds.get(i)) {
+          knnCollector.incVisitedCount(1);
+          knnCollector.collect(scorer.ordToDoc(i), scorer.score(i));
+        }
+      }
+    }
  }

  @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsWriter.java
@ -23,7 +23,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-import java.util.concurrent.ExecutorService;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FlatVectorsWriter;
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
@ -35,6 +34,7 @@ import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Sorter;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.TaskExecutor;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.InfoStream;
@ -67,7 +67,7 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
  private final int beamWidth;
  private final FlatVectorsWriter flatVectorWriter;
  private final int numMergeWorkers;
-  private final ExecutorService mergeExec;
+  private final TaskExecutor mergeExec;

  private final List<FieldWriter<?>> fields = new ArrayList<>();
  private boolean finished;
@ -78,7 +78,7 @@ public final class Lucene99HnswVectorsWriter extends KnnVectorsWriter {
      int beamWidth,
      FlatVectorsWriter flatVectorWriter,
      int numMergeWorkers,
-      ExecutorService mergeExec)
+      TaskExecutor mergeExec)
      throws IOException {
    this.M = M;
    this.flatVectorWriter = flatVectorWriter;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsFormat.java
@ -158,8 +158,8 @@ import org.apache.lucene.util.packed.PackedInts;
 *   <dd><b>Frequencies and Skip Data</b>
 *       <p>The .doc file contains the lists of documents which contain each term, along with the
 *       frequency of the term in that document (except when frequencies are omitted: {@link
- *       IndexOptions#DOCS}). It also saves skip data to the beginning of each packed or VInt block,
- *       when the length of document list is larger than packed block size.
+ *       IndexOptions#DOCS}). Skip data is saved at the end of each term's postings. The skip data
+ *       is saved once for the entire postings list.
 *       <ul>
 *         <li>docFile(.doc) --&gt; Header, &lt;TermFreqs, SkipData?&gt;<sup>TermCount</sup>, Footer
 *         <li>Header --&gt; {@link CodecUtil#writeIndexHeader IndexHeader}
@ -174,7 +174,8 @@ import org.apache.lucene.util.packed.PackedInts;
 *         <li>SkipDatum --&gt; DocSkip, DocFPSkip, &lt;PosFPSkip, PosBlockOffset, PayLength?,
 *             PayFPSkip?&gt;?, ImpactLength, &lt;CompetitiveFreqDelta, CompetitiveNormDelta?&gt;
 *             <sup>ImpactCount</sup>, SkipChildLevelPointer?
- *         <li>PackedDocDeltaBlock, PackedFreqBlock --&gt; {@link PackedInts PackedInts}
+ *         <li>PackedFreqBlock --&gt; {@link PackedInts PackedInts}, uses patching
+ *         <li>PackedDocDeltaBlock --&gt; {@link PackedInts PackedInts}, does not use patching
 *         <li>DocDelta, Freq, DocSkip, DocFPSkip, PosFPSkip, PosBlockOffset, PayByteUpto,
 *             PayFPSkip, ImpactLength, CompetitiveFreqDelta --&gt; {@link DataOutput#writeVInt
 *             VInt}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java
@ -142,21 +142,25 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {

  /** Read values that have been written using variable-length encoding instead of bit-packing. */
  static void readVIntBlock(
-      IndexInput docIn, long[] docBuffer, long[] freqBuffer, int num, boolean indexHasFreq)
+      IndexInput docIn,
+      long[] docBuffer,
+      long[] freqBuffer,
+      int num,
+      boolean indexHasFreq,
+      boolean decodeFreq)
      throws IOException {
-    if (indexHasFreq) {
-      for (int i = 0; i < num; i++) {
-        final int code = docIn.readVInt();
-        docBuffer[i] = code >>> 1;
-        if ((code & 1) != 0) {
-          freqBuffer[i] = 1;
-        } else {
+    GroupVIntReader.readValues(docIn, docBuffer, num);
+    if (indexHasFreq && decodeFreq) {
+      for (int i = 0; i < num; ++i) {
+        freqBuffer[i] = docBuffer[i] & 0x01;
+        docBuffer[i] >>= 1;
+        if (freqBuffer[i] == 0) {
          freqBuffer[i] = docIn.readVInt();
        }
      }
-    } else {
-      for (int i = 0; i < num; i++) {
-        docBuffer[i] = docIn.readVInt();
+    } else if (indexHasFreq) {
+      for (int i = 0; i < num; ++i) {
+        docBuffer[i] >>= 1;
      }
    }
  }
@ -471,7 +475,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
        blockUpto++;
      } else {
        // Read vInts:
-        readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
+        readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
        prefixSum(docBuffer, left, accum);
        docBuffer[left] = NO_MORE_DOCS;
        blockUpto += left;
@ -764,7 +768,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
        docBuffer[1] = NO_MORE_DOCS;
        blockUpto++;
      } else {
-        readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
+        readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
        prefixSum(docBuffer, left, accum);
        docBuffer[left] = NO_MORE_DOCS;
        blockUpto += left;
@ -1073,8 +1077,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {

    private int nextSkipDoc = -1;

-    private long seekTo = -1;
-
    // as we read freqBuffer lazily, isFreqsRead shows if freqBuffer are read for the current block
    // always true when we don't have freqBuffer (indexHasFreq=false) or don't need freqBuffer
    // (needsFreq=false)
@ -1153,7 +1155,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
        }
        blockUpto += BLOCK_SIZE;
      } else {
-        readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs);
+        readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreqs, true);
        prefixSum(docBuffer, left, accum);
        docBuffer[left] = NO_MORE_DOCS;
        blockUpto += left;
@ -1178,7 +1180,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
          // Force to read next block
          docBufferUpto = BLOCK_SIZE;
          accum = skipper.getDoc();
-          seekTo = skipper.getDocPointer(); // delay the seek
+          docIn.seek(skipper.getDocPointer());
+          isFreqsRead = true;
        }
        // next time we call advance, this is used to
        // foresee whether skipper is necessary.
@ -1198,11 +1201,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
    @Override
    public int nextDoc() throws IOException {
      if (docBufferUpto == BLOCK_SIZE) {
-        if (seekTo >= 0) {
-          docIn.seek(seekTo);
-          isFreqsRead = true; // reset isFreqsRead
-          seekTo = -1;
-        }
        refillDocs();
      }
      return this.doc = (int) docBuffer[docBufferUpto++];
@ -1214,11 +1212,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
        advanceShallow(target);
      }
      if (docBufferUpto == BLOCK_SIZE) {
-        if (seekTo >= 0) {
-          docIn.seek(seekTo);
-          isFreqsRead = true; // reset isFreqsRead
-          seekTo = -1;
-        }
        refillDocs();
      }

@ -1307,8 +1300,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {

    private int nextSkipDoc = -1;

-    private long seekTo = -1;
-
    public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
        throws IOException {
      indexHasOffsets =
@ -1372,7 +1363,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
        forDeltaUtil.decodeAndPrefixSum(docIn, accum, docBuffer);
        pforUtil.decode(docIn, freqBuffer);
      } else {
-        readVIntBlock(docIn, docBuffer, freqBuffer, left, true);
+        readVIntBlock(docIn, docBuffer, freqBuffer, left, true, true);
        prefixSum(docBuffer, left, accum);
        docBuffer[left] = NO_MORE_DOCS;
      }
@ -1426,7 +1417,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
          accum = skipper.getDoc();
          posPendingFP = skipper.getPosPointer();
          posPendingCount = skipper.getPosBufferUpto();
-          seekTo = skipper.getDocPointer(); // delay the seek
+          docIn.seek(skipper.getDocPointer());
        }
        // next time we call advance, this is used to
        // foresee whether skipper is necessary.
@ -1452,10 +1443,6 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
        advanceShallow(target);
      }
      if (docBufferUpto == BLOCK_SIZE) {
-        if (seekTo >= 0) {
-          docIn.seek(seekTo);
-          seekTo = -1;
-        }
        refillDocs();
      }

@ -1766,7 +1753,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
              false; // freq block will be loaded lazily when necessary, we don't load it here
        }
      } else {
-        readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq);
+        readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
        prefixSum(docBuffer, left, accum);
        docBuffer[left] = NO_MORE_DOCS;
      }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsWriter.java
@ -92,6 +92,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
  private final PForUtil pforUtil;
  private final ForDeltaUtil forDeltaUtil;
  private final Lucene99SkipWriter skipWriter;
+  private final GroupVIntWriter docGroupVIntWriter;

  private boolean fieldHasNorms;
  private NumericDocValues norms;
@ -172,6 +173,7 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
    skipWriter =
        new Lucene99SkipWriter(
            MAX_SKIP_LEVELS, BLOCK_SIZE, state.segmentInfo.maxDoc(), docOut, posOut, payOut);
+    docGroupVIntWriter = new GroupVIntWriter();
  }

  @Override
@ -370,17 +372,19 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
      singletonDocID = (int) docDeltaBuffer[0];
    } else {
      singletonDocID = -1;
-      // vInt encode the remaining doc deltas and freqs:
-      for (int i = 0; i < docBufferUpto; i++) {
-        final int docDelta = (int) docDeltaBuffer[i];
-        final int freq = (int) freqBuffer[i];
-        if (!writeFreqs) {
-          docOut.writeVInt(docDelta);
-        } else if (freq == 1) {
-          docOut.writeVInt((docDelta << 1) | 1);
-        } else {
-          docOut.writeVInt(docDelta << 1);
-          docOut.writeVInt(freq);
+      // Group vInt encode the remaining doc deltas and freqs:
+      if (writeFreqs) {
+        for (int i = 0; i < docBufferUpto; i++) {
+          docDeltaBuffer[i] = (docDeltaBuffer[i] << 1) | (freqBuffer[i] == 1 ? 1 : 0);
+        }
+      }
+      docGroupVIntWriter.writeValues(docOut, docDeltaBuffer, docBufferUpto);
+      if (writeFreqs) {
+        for (int i = 0; i < docBufferUpto; i++) {
+          final int freq = (int) freqBuffer[i];
+          if (freq != 1) {
+            docOut.writeVInt(freq);
+          }
        }
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsFormat.java
@ -43,17 +43,17 @@ public final class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsForma

  private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat();

-  /** The minimum quantile */
-  private static final float MINIMUM_QUANTILE = 0.9f;
+  /** The minimum confidence interval */
+  private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f;

-  /** The maximum quantile */
-  private static final float MAXIMUM_QUANTILE = 1f;
+  /** The maximum confidence interval */
+  private static final float MAXIMUM_CONFIDENCE_INTERVAL = 1f;

  /**
-   * Controls the quantile used to scalar quantize the vectors the default quantile is calculated as
-   * `1-1/(vector_dimensions + 1)`
+   * Controls the confidence interval used to scalar quantize the vectors the default value is
+   * calculated as `1-1/(vector_dimensions + 1)`
   */
-  final Float quantile;
+  final Float confidenceInterval;

  /** Constructs a format using default graph construction parameters */
  public Lucene99ScalarQuantizedVectorsFormat() {
@ -63,24 +63,26 @@ public final class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsForma
  /**
   * Constructs a format using the given graph construction parameters.
   *
-   * @param quantile the quantile for scalar quantizing the vectors, when `null` it is calculated
-   *     based on the vector field dimensions.
+   * @param confidenceInterval the confidenceInterval for scalar quantizing the vectors, when `null`
+   *     it is calculated based on the vector field dimensions.
   */
-  public Lucene99ScalarQuantizedVectorsFormat(Float quantile) {
-    if (quantile != null && (quantile < MINIMUM_QUANTILE || quantile > MAXIMUM_QUANTILE)) {
+  public Lucene99ScalarQuantizedVectorsFormat(Float confidenceInterval) {
+    if (confidenceInterval != null
+        && (confidenceInterval < MINIMUM_CONFIDENCE_INTERVAL
+            || confidenceInterval > MAXIMUM_CONFIDENCE_INTERVAL)) {
      throw new IllegalArgumentException(
-          "quantile must be between "
-              + MINIMUM_QUANTILE
+          "confidenceInterval must be between "
+              + MINIMUM_CONFIDENCE_INTERVAL
              + " and "
-              + MAXIMUM_QUANTILE
-              + "; quantile="
-              + quantile);
+              + MAXIMUM_CONFIDENCE_INTERVAL
+              + "; confidenceInterval="
+              + confidenceInterval);
    }
-    this.quantile = quantile;
+    this.confidenceInterval = confidenceInterval;
  }

-  static float calculateDefaultQuantile(int vectorDimension) {
-    return Math.max(MINIMUM_QUANTILE, 1f - (1f / (vectorDimension + 1)));
+  static float calculateDefaultConfidenceInterval(int vectorDimension) {
+    return Math.max(MINIMUM_CONFIDENCE_INTERVAL, 1f - (1f / (vectorDimension + 1)));
  }

  @Override
@ -88,8 +90,8 @@ public final class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsForma
    return NAME
        + "(name="
        + NAME
-        + ", quantile="
-        + quantile
+        + ", confidenceInterval="
+        + confidenceInterval
        + ", rawVectorFormat="
        + rawVectorFormat
        + ")";
@ -98,7 +100,7 @@ public final class Lucene99ScalarQuantizedVectorsFormat extends FlatVectorsForma
  @Override
  public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
    return new Lucene99ScalarQuantizedVectorsWriter(
-        state, quantile, rawVectorFormat.fieldsWriter(state));
+        state, confidenceInterval, rawVectorFormat.fieldsWriter(state));
  }

  @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsReader.java
@ -58,6 +58,7 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade

  Lucene99ScalarQuantizedVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader)
      throws IOException {
+    this.rawVectorsReader = rawVectorsReader;
    int versionMeta = -1;
    String metaFileName =
        IndexFileNames.segmentFileName(
@ -80,19 +81,8 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
      } catch (Throwable exception) {
        priorE = exception;
      } finally {
-        try {
-          CodecUtil.checkFooter(meta, priorE);
-          success = true;
-        } finally {
-          if (success == false) {
-            IOUtils.close(rawVectorsReader);
-          }
-        }
+        CodecUtil.checkFooter(meta, priorE);
      }
-    }
-    success = false;
-    this.rawVectorsReader = rawVectorsReader;
-    try {
      quantizedVectorData =
          openDataInput(
              state,
@ -313,10 +303,10 @@ public final class Lucene99ScalarQuantizedVectorsReader extends FlatVectorsReade
      dimension = input.readVInt();
      size = input.readInt();
      if (size > 0) {
-        float configuredQuantile = Float.intBitsToFloat(input.readInt());
+        float confidenceInterval = Float.intBitsToFloat(input.readInt());
        float minQuantile = Float.intBitsToFloat(input.readInt());
        float maxQuantile = Float.intBitsToFloat(input.readInt());
-        scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, configuredQuantile);
+        scalarQuantizer = new ScalarQuantizer(minQuantile, maxQuantile, confidenceInterval);
      } else {
        scalarQuantizer = null;
      }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99ScalarQuantizedVectorsWriter.java
@ -19,7 +19,7 @@ package org.apache.lucene.codecs.lucene99;

 import static org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
 import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.QUANTIZED_VECTOR_COMPONENT;
-import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultQuantile;
+import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.calculateDefaultConfidenceInterval;
 import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 import static org.apache.lucene.util.RamUsageEstimator.shallowSizeOfInstance;

@ -91,14 +91,14 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite

  private final List<FieldWriter> fields = new ArrayList<>();
  private final IndexOutput meta, quantizedVectorData;
-  private final Float quantile;
+  private final Float confidenceInterval;
  private final FlatVectorsWriter rawVectorDelegate;
  private boolean finished;

  Lucene99ScalarQuantizedVectorsWriter(
-      SegmentWriteState state, Float quantile, FlatVectorsWriter rawVectorDelegate)
+      SegmentWriteState state, Float confidenceInterval, FlatVectorsWriter rawVectorDelegate)
      throws IOException {
-    this.quantile = quantile;
+    this.confidenceInterval = confidenceInterval;
    segmentWriteState = state;
    String metaFileName =
        IndexFileNames.segmentFileName(
@ -142,12 +142,12 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
  public FlatFieldVectorsWriter<?> addField(
      FieldInfo fieldInfo, KnnFieldVectorsWriter<?> indexWriter) throws IOException {
    if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32)) {
-      float quantile =
-          this.quantile == null
-              ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-              : this.quantile;
+      float confidenceInterval =
+          this.confidenceInterval == null
+              ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+              : this.confidenceInterval;
      FieldWriter quantizedWriter =
-          new FieldWriter(quantile, fieldInfo, segmentWriteState.infoStream, indexWriter);
+          new FieldWriter(confidenceInterval, fieldInfo, segmentWriteState.infoStream, indexWriter);
      fields.add(quantizedWriter);
      indexWriter = quantizedWriter;
    }
@ -169,16 +169,16 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
      DocsWithFieldSet docsWithField =
          writeQuantizedVectorData(quantizedVectorData, byteVectorValues);
      long vectorDataLength = quantizedVectorData.getFilePointer() - vectorDataOffset;
-      float quantile =
-          this.quantile == null
-              ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-              : this.quantile;
+      float confidenceInterval =
+          this.confidenceInterval == null
+              ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+              : this.confidenceInterval;
      writeMeta(
          fieldInfo,
          segmentWriteState.segmentInfo.maxDoc(),
          vectorDataOffset,
          vectorDataLength,
-          quantile,
+          confidenceInterval,
          mergedQuantizationState.getLowerQuantile(),
          mergedQuantizationState.getUpperQuantile(),
          docsWithField);
@ -251,7 +251,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
        maxDoc,
        vectorDataOffset,
        vectorDataLength,
-        quantile,
+        confidenceInterval,
        fieldData.minQuantile,
        fieldData.maxQuantile,
        fieldData.docsWithField);
@ -262,7 +262,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
      int maxDoc,
      long vectorDataOffset,
      long vectorDataLength,
-      Float configuredQuantizationQuantile,
+      Float confidenceInterval,
      Float lowerQuantile,
      Float upperQuantile,
      DocsWithFieldSet docsWithField)
@ -279,9 +279,9 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
      assert Float.isFinite(lowerQuantile) && Float.isFinite(upperQuantile);
      meta.writeInt(
          Float.floatToIntBits(
-              configuredQuantizationQuantile != null
-                  ? configuredQuantizationQuantile
-                  : calculateDefaultQuantile(field.getVectorDimension())));
+              confidenceInterval != null
+                  ? confidenceInterval
+                  : calculateDefaultConfidenceInterval(field.getVectorDimension())));
      meta.writeInt(Float.floatToIntBits(lowerQuantile));
      meta.writeInt(Float.floatToIntBits(upperQuantile));
    }
@ -344,7 +344,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
        maxDoc,
        vectorDataOffset,
        quantizedVectorLength,
-        quantile,
+        confidenceInterval,
        fieldData.minQuantile,
        fieldData.maxQuantile,
        newDocsWithField);
@ -374,11 +374,11 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
  private ScalarQuantizer mergeQuantiles(FieldInfo fieldInfo, MergeState mergeState)
      throws IOException {
    assert fieldInfo.getVectorEncoding() == VectorEncoding.FLOAT32;
-    float quantile =
-        this.quantile == null
-            ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-            : this.quantile;
-    return mergeAndRecalculateQuantiles(mergeState, fieldInfo, quantile);
+    float confidenceInterval =
+        this.confidenceInterval == null
+            ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+            : this.confidenceInterval;
+    return mergeAndRecalculateQuantiles(mergeState, fieldInfo, confidenceInterval);
  }

  private ScalarQuantizedCloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
@ -408,16 +408,16 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
          quantizationDataInput, quantizationDataInput.length() - CodecUtil.footerLength());
      long vectorDataLength = quantizedVectorData.getFilePointer() - vectorDataOffset;
      CodecUtil.retrieveChecksum(quantizationDataInput);
-      float quantile =
-          this.quantile == null
-              ? calculateDefaultQuantile(fieldInfo.getVectorDimension())
-              : this.quantile;
+      float confidenceInterval =
+          this.confidenceInterval == null
+              ? calculateDefaultConfidenceInterval(fieldInfo.getVectorDimension())
+              : this.confidenceInterval;
      writeMeta(
          fieldInfo,
          segmentWriteState.segmentInfo.maxDoc(),
          vectorDataOffset,
          vectorDataLength,
-          quantile,
+          confidenceInterval,
          mergedQuantizationState.getLowerQuantile(),
          mergedQuantizationState.getUpperQuantile(),
          docsWithField);
@ -446,7 +446,9 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
  }

  static ScalarQuantizer mergeQuantiles(
-      List<ScalarQuantizer> quantizationStates, List<Integer> segmentSizes, float quantile) {
+      List<ScalarQuantizer> quantizationStates,
+      List<Integer> segmentSizes,
+      float confidenceInterval) {
    assert quantizationStates.size() == segmentSizes.size();
    if (quantizationStates.isEmpty()) {
      return null;
@ -464,7 +466,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
    }
    lowerQuantile /= totalCount;
    upperQuantile /= totalCount;
-    return new ScalarQuantizer(lowerQuantile, upperQuantile, quantile);
+    return new ScalarQuantizer(lowerQuantile, upperQuantile, confidenceInterval);
  }

  /**
@ -521,7 +523,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
  }

  static ScalarQuantizer mergeAndRecalculateQuantiles(
-      MergeState mergeState, FieldInfo fieldInfo, float quantile) throws IOException {
+      MergeState mergeState, FieldInfo fieldInfo, float confidenceInterval) throws IOException {
    List<ScalarQuantizer> quantizationStates = new ArrayList<>(mergeState.liveDocs.length);
    List<Integer> segmentSizes = new ArrayList<>(mergeState.liveDocs.length);
    for (int i = 0; i < mergeState.liveDocs.length; i++) {
@ -536,7 +538,8 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
        segmentSizes.add(fvv.size());
      }
    }
-    ScalarQuantizer mergedQuantiles = mergeQuantiles(quantizationStates, segmentSizes, quantile);
+    ScalarQuantizer mergedQuantiles =
+        mergeQuantiles(quantizationStates, segmentSizes, confidenceInterval);
    // Segments no providing quantization state indicates that their quantiles were never
    // calculated.
    // To be safe, we should always recalculate given a sample set over all the float vectors in the
@ -545,7 +548,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
    if (mergedQuantiles == null || shouldRecomputeQuantiles(mergedQuantiles, quantizationStates)) {
      FloatVectorValues vectorValues =
          KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
-      mergedQuantiles = ScalarQuantizer.fromVectors(vectorValues, quantile);
+      mergedQuantiles = ScalarQuantizer.fromVectors(vectorValues, confidenceInterval);
    }
    return mergedQuantiles;
  }
@ -599,7 +602,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
    private static final long SHALLOW_SIZE = shallowSizeOfInstance(FieldWriter.class);
    private final List<float[]> floatVectors;
    private final FieldInfo fieldInfo;
-    private final float quantile;
+    private final float confidenceInterval;
    private final InfoStream infoStream;
    private final boolean normalize;
    private float minQuantile = Float.POSITIVE_INFINITY;
@ -609,12 +612,12 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite

    @SuppressWarnings("unchecked")
    FieldWriter(
-        float quantile,
+        float confidenceInterval,
        FieldInfo fieldInfo,
        InfoStream infoStream,
        KnnFieldVectorsWriter<?> indexWriter) {
      super((KnnFieldVectorsWriter<float[]>) indexWriter);
-      this.quantile = quantile;
+      this.confidenceInterval = confidenceInterval;
      this.fieldInfo = fieldInfo;
      this.normalize = fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE;
      this.floatVectors = new ArrayList<>();
@ -635,15 +638,15 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite
              new FloatVectorWrapper(
                  floatVectors,
                  fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE),
-              quantile);
+              confidenceInterval);
      minQuantile = quantizer.getLowerQuantile();
      maxQuantile = quantizer.getUpperQuantile();
      if (infoStream.isEnabled(QUANTIZED_VECTOR_COMPONENT)) {
        infoStream.message(
            QUANTIZED_VECTOR_COMPONENT,
            "quantized field="
-                + " quantile="
-                + quantile
+                + " confidenceInterval="
+                + confidenceInterval
                + " minQuantile="
                + minQuantile
                + " maxQuantile="
@ -654,7 +657,7 @@ public final class Lucene99ScalarQuantizedVectorsWriter extends FlatVectorsWrite

    ScalarQuantizer createQuantizer() {
      assert finished;
-      return new ScalarQuantizer(minQuantile, maxQuantile, quantile);
+      return new ScalarQuantizer(minQuantile, maxQuantile, confidenceInterval);
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
@ -119,7 +119,6 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
      }
    }
  }
-  ;

  static String getSuffix(String formatName, String suffix) {
    return formatName + "_" + suffix;
--- a/lucene/core/src/java/org/apache/lucene/document/FeatureField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/FeatureField.java
@ -272,7 +272,6 @@ public final class FeatureField extends Field {
      return true;
    }
  }
-  ;

  static final class LogFunction extends FeatureFunction {

--- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
@ -16,6 +16,7 @@
 */
 package org.apache.lucene.document;

+import java.util.Collection;
 import java.util.Objects;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
@ -171,7 +172,7 @@ public class KeywordField extends Field {
   * @throws NullPointerException if {@code field} is null.
   * @return a query matching documents with this exact value
   */
-  public static Query newSetQuery(String field, BytesRef... values) {
+  public static Query newSetQuery(String field, Collection<BytesRef> values) {
    Objects.requireNonNull(field, "field must not be null");
    Objects.requireNonNull(values, "values must not be null");
    Query indexQuery = new TermInSetQuery(field, values);
--- a/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedDocValuesField.java
@ -16,6 +16,7 @@
 */
 package org.apache.lucene.document;

+import java.util.Collection;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.MultiTermQuery;
@ -99,7 +100,7 @@ public class SortedDocValuesField extends Field {
   * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on postings, such as
   * {@link TermInSetQuery}.
   */
-  public static Query newSlowSetQuery(String field, BytesRef... values) {
+  public static Query newSlowSetQuery(String field, Collection<BytesRef> values) {
    return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, values);
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SortedSetDocValuesField.java
@ -16,6 +16,7 @@
 */
 package org.apache.lucene.document;

+import java.util.Collection;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.MultiTermQuery;
@ -103,7 +104,7 @@ public class SortedSetDocValuesField extends Field {
   * in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on postings, such as
   * {@link TermInSetQuery}.
   */
-  public static Query newSlowSetQuery(String field, BytesRef... values) {
+  public static Query newSlowSetQuery(String field, Collection<BytesRef> values) {
    return new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, field, values);
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/document/SpatialQuery.java
@ -694,7 +694,7 @@ abstract class SpatialQuery extends Query {
      final SpatialVisitor spatialVisitor, QueryRelation queryRelation, final FixedBitSet result) {
    final BiFunction<byte[], byte[], Relation> innerFunction =
        spatialVisitor.getInnerFunction(queryRelation);
-    ;
+
    return new IntersectVisitor() {

      @Override
--- a/lucene/core/src/java/org/apache/lucene/geo/Tessellator.java
+++ b/lucene/core/src/java/org/apache/lucene/geo/Tessellator.java
@ -1254,8 +1254,7 @@ public final class Tessellator {
        ++numMerges;
        // step 'insize' places along from p
        q = p;
-        for (i = 0, pSize = 0; i < inSize && q != null; ++i, ++pSize, q = q.nextZ)
-          ;
+        for (i = 0, pSize = 0; i < inSize && q != null; ++i, ++pSize, q = q.nextZ) {}
        // if q hasn't fallen off end, we have two lists to merge
        qSize = inSize;

--- a/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ByteSliceReader.java
@ -22,11 +22,11 @@ import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.BitUtil;
 import org.apache.lucene.util.ByteBlockPool;

-/* IndexInput that knows how to read the byte slices written
- * by Posting and PostingVector.  We read the bytes in
- * each slice until we hit the end of that slice at which
- * point we read the forwarding address of the next slice
- * and then jump to it.*/
+/**
+ * IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read
+ * the bytes in each slice until we hit the end of that slice at which point we read the forwarding
+ * address of the next slice and then jump to it.
+ */
 final class ByteSliceReader extends DataInput {
  ByteBlockPool pool;
  int bufferUpto;
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -28,7 +28,7 @@ import java.nio.file.Paths;
 import java.text.NumberFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@ -96,11 +96,11 @@ import org.apache.lucene.util.Version;
 */
 public final class CheckIndex implements Closeable {

+  private final Directory dir;
+  private final Lock writeLock;
+  private final NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);
  private PrintStream infoStream;
-  private Directory dir;
-  private Lock writeLock;
  private volatile boolean closed;
-  private NumberFormat nf = NumberFormat.getInstance(Locale.ROOT);

  /**
   * Returned from {@link #checkIndex()} detailing the health and status of the index.
@ -441,19 +441,20 @@ public final class CheckIndex implements Closeable {
    IOUtils.close(writeLock);
  }

-  private boolean doSlowChecks;
+  private int level;

  /**
-   * If true, additional slow checks are performed. This will likely drastically increase time it
-   * takes to run CheckIndex!
+   * Sets Level, the higher the value, the more additional checks are performed. This will likely
+   * drastically increase time it takes to run CheckIndex! See {@link Level}
   */
-  public void setDoSlowChecks(boolean v) {
-    doSlowChecks = v;
+  public void setLevel(int v) {
+    Level.checkIfLevelInBounds(v);
+    level = v;
  }

-  /** See {@link #setDoSlowChecks}. */
-  public boolean doSlowChecks() {
-    return doSlowChecks;
+  /** See {@link #setLevel}. */
+  public int getLevel() {
+    return level;
  }

  private boolean failFast;
@ -473,21 +474,6 @@ public final class CheckIndex implements Closeable {

  private boolean verbose;

-  /** See {@link #getChecksumsOnly}. */
-  public boolean getChecksumsOnly() {
-    return checksumsOnly;
-  }
-
-  /**
-   * If true, only validate physical integrity for all files. Note that the returned nested status
-   * objects (e.g. storedFieldStatus) will be null.
-   */
-  public void setChecksumsOnly(boolean v) {
-    checksumsOnly = v;
-  }
-
-  private boolean checksumsOnly;
-
  /** Set threadCount used for parallelizing index integrity checking. */
  public void setThreadCount(int tc) {
    if (tc <= 0) {
@ -586,7 +572,6 @@ public final class CheckIndex implements Closeable {
    ensureOpen();
    long startNS = System.nanoTime();

-    SegmentInfos sis = null;
    Status result = new Status();
    result.dir = dir;
    String[] files = dir.listAll();
@ -595,43 +580,115 @@ public final class CheckIndex implements Closeable {
      throw new IndexNotFoundException(
          "no segments* file found in " + dir + ": files: " + Arrays.toString(files));
    }
-    try {
-      // Do not use SegmentInfos.read(Directory) since the spooky
-      // retrying it does is not necessary here (we hold the write lock):
-      sis =
-          SegmentInfos.readCommit(
-              dir, lastSegmentsFile, 0 /* always open old indices if codecs are around */);
-    } catch (Throwable t) {
-      if (failFast) {
-        throw IOUtils.rethrowAlways(t);
+
+    // https://github.com/apache/lucene/issues/7820: also attempt to open any older commit
+    // points (segments_N), which will catch certain corruption like missing _N.si files
+    // for segments not also referenced by the newest commit point (which was already
+    // loaded, successfully, above).  Note that we do not do a deeper check of segments
+    // referenced ONLY by these older commit points, because such corruption would not
+    // prevent a new IndexWriter from opening on the newest commit point.  but it is still
+    // corruption, e.g. a reader opened on those old commit points can hit corruption
+    // exceptions which we (still) will not detect here.  progress not perfection!
+
+    SegmentInfos lastCommit = null;
+
+    List<String> allSegmentsFiles = new ArrayList<>();
+    for (String fileName : files) {
+      if (fileName.startsWith(IndexFileNames.SEGMENTS)
+          && fileName.equals(SegmentInfos.OLD_SEGMENTS_GEN) == false) {
+        allSegmentsFiles.add(fileName);
      }
+    }
+
+    // Sort descending by generation so that we always attempt to read the last commit first.  This
+    // way if an index has a broken last commit AND a broken old commit, we report the last commit
+    // error first:
+    allSegmentsFiles.sort(
+        new Comparator<String>() {
+          @Override
+          public int compare(String a, String b) {
+            long genA = SegmentInfos.generationFromSegmentsFileName(a);
+            long genB = SegmentInfos.generationFromSegmentsFileName(b);
+
+            // reversed natural sort (largest generation first):
+            return -Long.compare(genA, genB);
+          }
+        });
+
+    for (String fileName : allSegmentsFiles) {
+
+      boolean isLastCommit = fileName.equals(lastSegmentsFile);
+
+      SegmentInfos infos;
+
+      try {
+        // Do not use SegmentInfos.read(Directory) since the spooky
+        // retrying it does is not necessary here (we hold the write lock):
+        // always open old indices if codecs are around
+        infos = SegmentInfos.readCommit(dir, fileName, 0);
+      } catch (Throwable t) {
+        if (failFast) {
+          throw IOUtils.rethrowAlways(t);
+        }
+
+        String message;
+
+        if (isLastCommit) {
+          message =
+              "ERROR: could not read latest commit point from segments file \""
+                  + fileName
+                  + "\" in directory";
+        } else {
+          message =
+              "ERROR: could not read old (not latest) commit point segments file \""
+                  + fileName
+                  + "\" in directory";
+        }
+        msg(infoStream, message);
+        result.missingSegments = true;
+        if (infoStream != null) {
+          t.printStackTrace(infoStream);
+        }
+        return result;
+      }
+
+      if (isLastCommit) {
+        // record the latest commit point: we will deeply check all segments referenced by it
+        lastCommit = infos;
+      }
+    }
+
+    // we know there is a lastSegmentsFileName, so we must've attempted to load it in the above for
+    // loop.  if it failed to load, we threw the exception (fastFail == true) or we returned the
+    // failure (fastFail == false).  so if we get here, we should // always have a valid lastCommit:
+    assert lastCommit != null;
+
+    if (lastCommit == null) {
      msg(infoStream, "ERROR: could not read any segments file in directory");
      result.missingSegments = true;
-      if (infoStream != null) t.printStackTrace(infoStream);
      return result;
    }

    if (infoStream != null) {
      int maxDoc = 0;
      int delCount = 0;
-      for (SegmentCommitInfo info : sis) {
+      for (SegmentCommitInfo info : lastCommit) {
        maxDoc += info.info.maxDoc();
        delCount += info.getDelCount();
      }
-      infoStream.println(
-          String.format(
-              Locale.ROOT,
-              "%.2f%% total deletions; %d documents; %d deletions",
-              100. * delCount / maxDoc,
-              maxDoc,
-              delCount));
+      infoStream.printf(
+          Locale.ROOT,
+          "%.2f%% total deletions; %d documents; %d deletions%n",
+          100. * delCount / maxDoc,
+          maxDoc,
+          delCount);
    }

    // find the oldest and newest segment versions
    Version oldest = null;
    Version newest = null;
    String oldSegs = null;
-    for (SegmentCommitInfo si : sis) {
+    for (SegmentCommitInfo si : lastCommit) {
      Version version = si.info.getVersion();
      if (version == null) {
        // pre-3.1 segment
@ -646,14 +703,14 @@ public final class CheckIndex implements Closeable {
      }
    }

-    final int numSegments = sis.size();
-    final String segmentsFileName = sis.getSegmentsFileName();
+    final int numSegments = lastCommit.size();
+    final String segmentsFileName = lastCommit.getSegmentsFileName();
    result.segmentsFileName = segmentsFileName;
    result.numSegments = numSegments;
-    result.userData = sis.getUserData();
+    result.userData = lastCommit.getUserData();
    String userDataString;
-    if (sis.getUserData().size() > 0) {
-      userDataString = " userData=" + sis.getUserData();
+    if (lastCommit.getUserData().size() > 0) {
+      userDataString = " userData=" + lastCommit.getUserData();
    } else {
      userDataString = "";
    }
@ -681,7 +738,7 @@ public final class CheckIndex implements Closeable {
            + " "
            + versionString
            + " id="
-            + StringHelper.idToString(sis.getId())
+            + StringHelper.idToString(lastCommit.getId())
            + userDataString);

    if (onlySegments != null) {
@ -696,14 +753,14 @@ public final class CheckIndex implements Closeable {
      msg(infoStream, ":");
    }

-    result.newSegments = sis.clone();
+    result.newSegments = lastCommit.clone();
    result.newSegments.clear();
    result.maxSegmentName = -1;

    // checks segments sequentially
    if (executorService == null) {
      for (int i = 0; i < numSegments; i++) {
-        final SegmentCommitInfo info = sis.info(i);
+        final SegmentCommitInfo info = lastCommit.info(i);
        updateMaxSegmentName(result, info);
        if (onlySegments != null && !onlySegments.contains(info.info.name)) {
          continue;
@ -718,7 +775,7 @@ public final class CheckIndex implements Closeable {
                + info.info.name
                + " maxDoc="
                + info.info.maxDoc());
-        Status.SegmentInfoStatus segmentInfoStatus = testSegment(sis, info, infoStream);
+        Status.SegmentInfoStatus segmentInfoStatus = testSegment(lastCommit, info, infoStream);

        processSegmentInfoStatusResult(result, info, segmentInfoStatus);
      }
@ -729,14 +786,13 @@ public final class CheckIndex implements Closeable {

      // checks segments concurrently
      List<SegmentCommitInfo> segmentCommitInfos = new ArrayList<>();
-      for (SegmentCommitInfo sci : sis) {
+      for (SegmentCommitInfo sci : lastCommit) {
        segmentCommitInfos.add(sci);
      }

      // sort segmentCommitInfos by segment size, as smaller segment tends to finish faster, and
      // hence its output can be printed out faster
-      Collections.sort(
-          segmentCommitInfos,
+      segmentCommitInfos.sort(
          (info1, info2) -> {
            try {
              return Long.compare(info1.sizeInBytes(), info2.sizeInBytes());
@ -757,7 +813,7 @@ public final class CheckIndex implements Closeable {
          continue;
        }

-        SegmentInfos finalSis = sis;
+        SegmentInfos finalSis = lastCommit;

        ByteArrayOutputStream output = new ByteArrayOutputStream();
        PrintStream stream = new PrintStream(output, true, IOUtils.UTF_8);
@ -813,7 +869,7 @@ public final class CheckIndex implements Closeable {

    if (0 == result.numBadSegments) {
      result.clean = true;
-    } else
+    } else {
      msg(
          infoStream,
          "WARNING: "
@ -821,14 +877,16 @@ public final class CheckIndex implements Closeable {
              + " broken segments (containing "
              + result.totLoseDocCount
              + " documents) detected");
+    }

-    if (!(result.validCounter = (result.maxSegmentName < sis.counter))) {
+    result.validCounter = result.maxSegmentName < lastCommit.counter;
+    if (result.validCounter == false) {
      result.clean = false;
      result.newSegments.counter = result.maxSegmentName + 1;
      msg(
          infoStream,
          "ERROR: Next segment name counter "
-              + sis.counter
+              + lastCommit.counter
              + " is not greater than max segment name "
              + result.maxSegmentName);
    }
@ -921,7 +979,7 @@ public final class CheckIndex implements Closeable {
        msg(infoStream, "    diagnostics = " + diagnostics);
      }

-      if (!info.hasDeletions()) {
+      if (info.hasDeletions() == false) {
        msg(infoStream, "    no deletions");
        segInfoStat.hasDeletions = false;
      } else {
@ -960,26 +1018,26 @@ public final class CheckIndex implements Closeable {
      toLoseDocCount = numDocs;

      if (reader.hasDeletions()) {
-        if (reader.numDocs() != info.info.maxDoc() - info.getDelCount()) {
+        if (numDocs != info.info.maxDoc() - info.getDelCount()) {
          throw new CheckIndexException(
              "delete count mismatch: info="
                  + (info.info.maxDoc() - info.getDelCount())
                  + " vs reader="
-                  + reader.numDocs());
+                  + numDocs);
        }
-        if ((info.info.maxDoc() - reader.numDocs()) > reader.maxDoc()) {
+        if ((info.info.maxDoc() - numDocs) > reader.maxDoc()) {
          throw new CheckIndexException(
              "too many deleted docs: maxDoc()="
                  + reader.maxDoc()
                  + " vs del count="
-                  + (info.info.maxDoc() - reader.numDocs()));
+                  + (info.info.maxDoc() - numDocs));
        }
-        if (info.info.maxDoc() - reader.numDocs() != info.getDelCount()) {
+        if (info.info.maxDoc() - numDocs != info.getDelCount()) {
          throw new CheckIndexException(
              "delete count mismatch: info="
                  + info.getDelCount()
                  + " vs reader="
-                  + (info.info.maxDoc() - reader.numDocs()));
+                  + (info.info.maxDoc() - numDocs));
        }
      } else {
        if (info.getDelCount() != 0) {
@ -987,11 +1045,10 @@ public final class CheckIndex implements Closeable {
              "delete count mismatch: info="
                  + info.getDelCount()
                  + " vs reader="
-                  + (info.info.maxDoc() - reader.numDocs()));
+                  + (info.info.maxDoc() - numDocs));
        }
      }
-
-      if (checksumsOnly == false) {
+      if (level >= Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS) {
        // Test Livedocs
        segInfoStat.liveDocStatus = testLiveDocs(reader, infoStream, failFast);

@ -1002,15 +1059,14 @@ public final class CheckIndex implements Closeable {
        segInfoStat.fieldNormStatus = testFieldNorms(reader, infoStream, failFast);

        // Test the Term Index
-        segInfoStat.termIndexStatus =
-            testPostings(reader, infoStream, verbose, doSlowChecks, failFast);
+        segInfoStat.termIndexStatus = testPostings(reader, infoStream, verbose, level, failFast);

        // Test Stored Fields
        segInfoStat.storedFieldStatus = testStoredFields(reader, infoStream, failFast);

        // Test Term Vectors
        segInfoStat.termVectorStatus =
-            testTermVectors(reader, infoStream, verbose, doSlowChecks, failFast);
+            testTermVectors(reader, infoStream, verbose, level, failFast);

        // Test Docvalues
        segInfoStat.docValuesStatus = testDocValues(reader, infoStream, failFast);
@ -1213,7 +1269,7 @@ public final class CheckIndex implements Closeable {
        if (liveDocs != null) {
          // it's ok for it to be non-null here, as long as none are set right?
          for (int j = 0; j < liveDocs.length(); j++) {
-            if (!liveDocs.get(j)) {
+            if (liveDocs.get(j) == false) {
              throw new CheckIndexException(
                  "liveDocs mismatch: info says no deletions but doc " + j + " is deleted.");
            }
@ -1341,7 +1397,7 @@ public final class CheckIndex implements Closeable {
      boolean isVectors,
      PrintStream infoStream,
      boolean verbose,
-      boolean doSlowChecks)
+      int level)
      throws IOException {
    // TODO: we should probably return our own stats thing...?!
    long startNS;
@ -1450,7 +1506,7 @@ public final class CheckIndex implements Closeable {
                + hasFreqs);
      }

-      if (!isVectors) {
+      if (isVectors == false) {
        final boolean expectedHasPositions =
            fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
        if (hasPositions != expectedHasPositions) {
@ -1810,7 +1866,7 @@ public final class CheckIndex implements Closeable {
                  // free-for-all before?
                  // but for offsets in the postings lists these checks are fine: they were always
                  // enforced by IndexWriter
-                  if (!isVectors) {
+                  if (isVectors == false) {
                    if (startOffset < 0) {
                      throw new CheckIndexException(
                          "term "
@ -1924,14 +1980,13 @@ public final class CheckIndex implements Closeable {
        }

        // Checking score blocks is heavy, we only do it on long postings lists, on every 1024th
-        // term
-        // or if slow checks are enabled.
-        if (doSlowChecks
+        // term or if slow checks are enabled.
+        if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS
            || docFreq > 1024
            || (status.termCount + status.delTermCount) % 1024 == 0) {
          // First check max scores and block uptos
-          // But only if slok checks are enabled since we visit all docs
-          if (doSlowChecks) {
+          // But only if slow checks are enabled since we visit all docs
+          if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
            int max = -1;
            int maxFreq = 0;
            ImpactsEnum impactsEnum = termsEnum.impacts(PostingsEnum.FREQS);
@ -1998,9 +2053,9 @@ public final class CheckIndex implements Closeable {
              Impacts impacts = impactsEnum.getImpacts();
              checkImpacts(impacts, doc);
              maxFreq = Integer.MAX_VALUE;
-              for (int level = 0; level < impacts.numLevels(); ++level) {
-                if (impacts.getDocIdUpTo(level) >= max) {
-                  List<Impact> perLevelImpacts = impacts.getImpacts(level);
+              for (int impactsLevel = 0; impactsLevel < impacts.numLevels(); ++impactsLevel) {
+                if (impacts.getDocIdUpTo(impactsLevel) >= max) {
+                  List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
                  maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
                  break;
                }
@ -2040,9 +2095,9 @@ public final class CheckIndex implements Closeable {
              Impacts impacts = impactsEnum.getImpacts();
              checkImpacts(impacts, doc);
              maxFreq = Integer.MAX_VALUE;
-              for (int level = 0; level < impacts.numLevels(); ++level) {
-                if (impacts.getDocIdUpTo(level) >= max) {
-                  List<Impact> perLevelImpacts = impacts.getImpacts(level);
+              for (int impactsLevel = 0; impactsLevel < impacts.numLevels(); ++impactsLevel) {
+                if (impacts.getDocIdUpTo(impactsLevel) >= max) {
+                  List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
                  maxFreq = perLevelImpacts.get(perLevelImpacts.size() - 1).freq;
                  break;
                }
@ -2151,7 +2206,7 @@ public final class CheckIndex implements Closeable {
                        + " doesn't have terms according to postings but has a norm value that is not zero: "
                        + Long.toUnsignedString(norm));
              }
-            } else if (norm == 0 && visitedDocs.get(doc)) {
+            } else if (visitedDocs.get(doc)) {
              throw new CheckIndexException(
                  "Document "
                      + doc
@ -2307,7 +2362,7 @@ public final class CheckIndex implements Closeable {
  static void checkImpacts(Impacts impacts, int lastTarget) {
    final int numLevels = impacts.numLevels();
    if (numLevels < 1) {
-      throw new CheckIndexException("The number of levels must be >= 1, got " + numLevels);
+      throw new CheckIndexException("The number of impact levels must be >= 1, got " + numLevels);
    }

    int docIdUpTo0 = impacts.getDocIdUpTo(0);
@ -2319,17 +2374,17 @@ public final class CheckIndex implements Closeable {
              + lastTarget);
    }

-    for (int level = 1; level < numLevels; ++level) {
-      int docIdUpTo = impacts.getDocIdUpTo(level);
-      int previousDocIdUpTo = impacts.getDocIdUpTo(level - 1);
+    for (int impactsLevel = 1; impactsLevel < numLevels; ++impactsLevel) {
+      int docIdUpTo = impacts.getDocIdUpTo(impactsLevel);
+      int previousDocIdUpTo = impacts.getDocIdUpTo(impactsLevel - 1);
      if (docIdUpTo < previousDocIdUpTo) {
        throw new CheckIndexException(
            "Decreasing return for getDocIdUpTo: level "
-                + (level - 1)
+                + (impactsLevel - 1)
                + " returned "
                + previousDocIdUpTo
                + " but level "
-                + level
+                + impactsLevel
                + " returned "
                + docIdUpTo
                + " for target "
@ -2337,10 +2392,10 @@ public final class CheckIndex implements Closeable {
      }
    }

-    for (int level = 0; level < numLevels; ++level) {
-      List<Impact> perLevelImpacts = impacts.getImpacts(level);
+    for (int impactsLevel = 0; impactsLevel < numLevels; ++impactsLevel) {
+      List<Impact> perLevelImpacts = impacts.getImpacts(impactsLevel);
      if (perLevelImpacts.isEmpty()) {
-        throw new CheckIndexException("Got empty list of impacts on level " + level);
+        throw new CheckIndexException("Got empty list of impacts on level " + impactsLevel);
      }
      Impact first = perLevelImpacts.get(0);
      if (first.freq < 1) {
@ -2358,9 +2413,9 @@ public final class CheckIndex implements Closeable {
              "Impacts are not ordered or contain dups, got " + previous + " then " + impact);
        }
      }
-      if (level > 0) {
-        // Make sure that impacts at level N trigger better scores than an level N-1
-        Iterator<Impact> previousIt = impacts.getImpacts(level - 1).iterator();
+      if (impactsLevel > 0) {
+        // Make sure that impacts at level N trigger better scores than an impactsLevel N-1
+        Iterator<Impact> previousIt = impacts.getImpacts(impactsLevel - 1).iterator();
        previous = previousIt.next();
        Iterator<Impact> it = perLevelImpacts.iterator();
        Impact impact = it.next();
@ -2376,9 +2431,9 @@ public final class CheckIndex implements Closeable {
                "Found impact "
                    + previous
                    + " on level "
-                    + (level - 1)
+                    + (impactsLevel - 1)
                    + " but no impact on level "
-                    + level
+                    + impactsLevel
                    + " triggers a better score: "
                    + perLevelImpacts);
          }
@ -2395,7 +2450,7 @@ public final class CheckIndex implements Closeable {
   */
  public static Status.TermIndexStatus testPostings(CodecReader reader, PrintStream infoStream)
      throws IOException {
-    return testPostings(reader, infoStream, false, true, false);
+    return testPostings(reader, infoStream, false, Level.MIN_LEVEL_FOR_SLOW_CHECKS, false);
  }

  /**
@ -2404,15 +2459,11 @@ public final class CheckIndex implements Closeable {
   * @lucene.experimental
   */
  public static Status.TermIndexStatus testPostings(
-      CodecReader reader,
-      PrintStream infoStream,
-      boolean verbose,
-      boolean doSlowChecks,
-      boolean failFast)
+      CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
      throws IOException {

-    // TODO: we should go and verify term vectors match, if
-    // doSlowChecks is on...
+    // TODO: we should go and verify term vectors match, if the Level is high enough to
+    // include slow checks
    Status.TermIndexStatus status;
    final int maxDoc = reader.maxDoc();

@ -2443,7 +2494,7 @@ public final class CheckIndex implements Closeable {
              false,
              infoStream,
              verbose,
-              doSlowChecks);
+              level);
    } catch (Throwable e) {
      if (failFast) {
        throw IOUtils.rethrowAlways(e);
@ -3132,7 +3183,7 @@ public final class CheckIndex implements Closeable {
      for (FieldInfo fieldInfo : reader.getFieldInfos()) {
        if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
          status.totalValueFields++;
-          checkDocValues(fieldInfo, dvReader, reader.maxDoc(), infoStream, status);
+          checkDocValues(fieldInfo, dvReader, status);
        }
      }

@ -3162,11 +3213,11 @@ public final class CheckIndex implements Closeable {
  }

  @FunctionalInterface
-  private static interface DocValuesIteratorSupplier {
+  private interface DocValuesIteratorSupplier {
    DocValuesIterator get(FieldInfo fi) throws IOException;
  }

-  private static void checkDVIterator(FieldInfo fi, int maxDoc, DocValuesIteratorSupplier producer)
+  private static void checkDVIterator(FieldInfo fi, DocValuesIteratorSupplier producer)
      throws IOException {
    String field = fi.name;

@ -3284,7 +3335,7 @@ public final class CheckIndex implements Closeable {
  }

  private static void checkBinaryDocValues(
-      String fieldName, int maxDoc, BinaryDocValues bdv, BinaryDocValues bdv2) throws IOException {
+      String fieldName, BinaryDocValues bdv, BinaryDocValues bdv2) throws IOException {
    if (bdv.docID() != -1) {
      throw new CheckIndexException(
          "binary dv iterator for field: "
@ -3309,7 +3360,7 @@ public final class CheckIndex implements Closeable {
  }

  private static void checkSortedDocValues(
-      String fieldName, int maxDoc, SortedDocValues dv, SortedDocValues dv2) throws IOException {
+      String fieldName, SortedDocValues dv, SortedDocValues dv2) throws IOException {
    if (dv.docID() != -1) {
      throw new CheckIndexException(
          "sorted dv iterator for field: "
@ -3373,8 +3424,7 @@ public final class CheckIndex implements Closeable {
  }

  private static void checkSortedSetDocValues(
-      String fieldName, int maxDoc, SortedSetDocValues dv, SortedSetDocValues dv2)
-      throws IOException {
+      String fieldName, SortedSetDocValues dv, SortedSetDocValues dv2) throws IOException {
    final long maxOrd = dv.getValueCount() - 1;
    LongBitSet seenOrds = new LongBitSet(dv.getValueCount());
    long maxOrd2 = -1;
@ -3470,7 +3520,7 @@ public final class CheckIndex implements Closeable {
  }

  private static void checkSortedNumericDocValues(
-      String fieldName, int maxDoc, SortedNumericDocValues ndv, SortedNumericDocValues ndv2)
+      String fieldName, SortedNumericDocValues ndv, SortedNumericDocValues ndv2)
      throws IOException {
    if (ndv.docID() != -1) {
      throw new CheckIndexException(
@ -3539,38 +3589,32 @@ public final class CheckIndex implements Closeable {
  }

  private static void checkDocValues(
-      FieldInfo fi,
-      DocValuesProducer dvReader,
-      int maxDoc,
-      PrintStream infoStream,
-      DocValuesStatus status)
-      throws Exception {
+      FieldInfo fi, DocValuesProducer dvReader, DocValuesStatus status) throws Exception {
    switch (fi.getDocValuesType()) {
      case SORTED:
        status.totalSortedFields++;
-        checkDVIterator(fi, maxDoc, dvReader::getSorted);
-        checkSortedDocValues(fi.name, maxDoc, dvReader.getSorted(fi), dvReader.getSorted(fi));
+        checkDVIterator(fi, dvReader::getSorted);
+        checkSortedDocValues(fi.name, dvReader.getSorted(fi), dvReader.getSorted(fi));
        break;
      case SORTED_NUMERIC:
        status.totalSortedNumericFields++;
-        checkDVIterator(fi, maxDoc, dvReader::getSortedNumeric);
+        checkDVIterator(fi, dvReader::getSortedNumeric);
        checkSortedNumericDocValues(
-            fi.name, maxDoc, dvReader.getSortedNumeric(fi), dvReader.getSortedNumeric(fi));
+            fi.name, dvReader.getSortedNumeric(fi), dvReader.getSortedNumeric(fi));
        break;
      case SORTED_SET:
        status.totalSortedSetFields++;
-        checkDVIterator(fi, maxDoc, dvReader::getSortedSet);
-        checkSortedSetDocValues(
-            fi.name, maxDoc, dvReader.getSortedSet(fi), dvReader.getSortedSet(fi));
+        checkDVIterator(fi, dvReader::getSortedSet);
+        checkSortedSetDocValues(fi.name, dvReader.getSortedSet(fi), dvReader.getSortedSet(fi));
        break;
      case BINARY:
        status.totalBinaryFields++;
-        checkDVIterator(fi, maxDoc, dvReader::getBinary);
-        checkBinaryDocValues(fi.name, maxDoc, dvReader.getBinary(fi), dvReader.getBinary(fi));
+        checkDVIterator(fi, dvReader::getBinary);
+        checkBinaryDocValues(fi.name, dvReader.getBinary(fi), dvReader.getBinary(fi));
        break;
      case NUMERIC:
        status.totalNumericFields++;
-        checkDVIterator(fi, maxDoc, dvReader::getNumeric);
+        checkDVIterator(fi, dvReader::getNumeric);
        checkNumericDocValues(fi.name, dvReader.getNumeric(fi), dvReader.getNumeric(fi));
        break;
      case NONE:
@ -3586,7 +3630,7 @@ public final class CheckIndex implements Closeable {
   */
  public static Status.TermVectorStatus testTermVectors(CodecReader reader, PrintStream infoStream)
      throws IOException {
-    return testTermVectors(reader, infoStream, false, false, false);
+    return testTermVectors(reader, infoStream, false, Level.MIN_LEVEL_FOR_INTEGRITY_CHECKS, false);
  }

  /**
@ -3595,11 +3639,7 @@ public final class CheckIndex implements Closeable {
   * @lucene.experimental
   */
  public static Status.TermVectorStatus testTermVectors(
-      CodecReader reader,
-      PrintStream infoStream,
-      boolean verbose,
-      boolean doSlowChecks,
-      boolean failFast)
+      CodecReader reader, PrintStream infoStream, boolean verbose, int level, boolean failFast)
      throws IOException {
    long startNS = System.nanoTime();
    final Status.TermVectorStatus status = new Status.TermVectorStatus();
@ -3612,14 +3652,14 @@ public final class CheckIndex implements Closeable {

      PostingsEnum postings = null;

-      // Only used if doSlowChecks is true:
+      // Only used if the Level is high enough to include slow checks:
      PostingsEnum postingsDocs = null;

      final Bits liveDocs = reader.getLiveDocs();

      FieldsProducer postingsFields;
      // TODO: testTermsIndex
-      if (doSlowChecks) {
+      if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
        postingsFields = reader.getPostingsReader();
        if (postingsFields != null) {
          postingsFields = postingsFields.getMergeInstance();
@ -3643,8 +3683,7 @@ public final class CheckIndex implements Closeable {

          if (tfv != null) {
            // First run with no deletions:
-            checkFields(
-                tfv, null, 1, fieldInfos, null, false, true, infoStream, verbose, doSlowChecks);
+            checkFields(tfv, null, 1, fieldInfos, null, false, true, infoStream, verbose, level);

            // Only agg stats if the doc is live:
            final boolean doStats = liveDocs == null || liveDocs.get(j);
@ -3660,7 +3699,7 @@ public final class CheckIndex implements Closeable {

              // Make sure FieldInfo thinks this field is vector'd:
              final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
-              if (!fieldInfo.hasVectors()) {
+              if (fieldInfo.hasVectors() == false) {
                throw new CheckIndexException(
                    "docID="
                        + j
@ -3669,7 +3708,7 @@ public final class CheckIndex implements Closeable {
                        + " but FieldInfo has storeTermVector=false");
              }

-              if (doSlowChecks) {
+              if (level >= Level.MIN_LEVEL_FOR_SLOW_CHECKS) {
                Terms terms = tfv.terms(field);
                TermsEnum termsEnum = terms.iterator();
                final boolean postingsHasFreq =
@ -3696,7 +3735,7 @@ public final class CheckIndex implements Closeable {
                  postings = termsEnum.postings(postings, PostingsEnum.ALL);
                  assert postings != null;

-                  if (!postingsTermsEnum.seekExact(term)) {
+                  if (postingsTermsEnum.seekExact(term) == false) {
                    throw new CheckIndexException(
                        "vector term="
                            + term
@ -3852,7 +3891,7 @@ public final class CheckIndex implements Closeable {
                                      + " but postings does not.");
                            }
                            BytesRef postingsPayload = postingsDocs.getPayload();
-                            if (!payload.equals(postingsPayload)) {
+                            if (payload.equals(postingsPayload) == false) {
                              throw new CheckIndexException(
                                  "vector term="
                                      + term
@ -3972,9 +4011,8 @@ public final class CheckIndex implements Closeable {
  /** Run-time configuration options for CheckIndex commands. */
  public static class Options {
    boolean doExorcise = false;
-    boolean doSlowChecks = false;
    boolean verbose = false;
-    boolean doChecksumsOnly = false;
+    int level = Level.DEFAULT_VALUE;
    int threadCount;
    List<String> onlySegments = new ArrayList<>();
    String indexPath = null;
@ -4011,9 +4049,10 @@ public final class CheckIndex implements Closeable {
      return 1;
    }

-    if (!assertsOn())
+    if (assertsOn() == false) {
      System.out.println(
          "\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
+    }

    System.out.println("\nOpening index @ " + opts.indexPath + "\n");
    Directory directory = null;
@ -4037,6 +4076,42 @@ public final class CheckIndex implements Closeable {
    }
  }

+  /** Class with static variables with information about CheckIndex's -level parameter. */
+  public static class Level {
+    private Level() {}
+
+    /** Minimum valid level. */
+    public static final int MIN_VALUE = 1;
+
+    /** Maximum valid level. */
+    public static final int MAX_VALUE = 3;
+
+    /** The default level if none is specified. */
+    public static final int DEFAULT_VALUE = MIN_VALUE;
+
+    /** Minimum level required to run checksum checks. */
+    public static final int MIN_LEVEL_FOR_CHECKSUM_CHECKS = 1;
+
+    /** Minimum level required to run integrity checks. */
+    public static final int MIN_LEVEL_FOR_INTEGRITY_CHECKS = 2;
+
+    /** Minimum level required to run slow checks. */
+    public static final int MIN_LEVEL_FOR_SLOW_CHECKS = 3;
+
+    /** Checks if given level value is within the allowed bounds else it raises an Exception. */
+    public static void checkIfLevelInBounds(int levelVal) throws IllegalArgumentException {
+      if (levelVal < Level.MIN_VALUE || levelVal > Level.MAX_VALUE) {
+        throw new IllegalArgumentException(
+            String.format(
+                Locale.ROOT,
+                "ERROR: given value: '%d' for -level option is out of bounds. Please use a value from '%d'->'%d'",
+                levelVal,
+                Level.MIN_VALUE,
+                Level.MAX_VALUE));
+      }
+    }
+  }
+
  /**
   * Parse command line args into fields
   *
@ -4051,15 +4126,29 @@ public final class CheckIndex implements Closeable {
    int i = 0;
    while (i < args.length) {
      String arg = args[i];
-      if ("-fast".equals(arg)) {
-        opts.doChecksumsOnly = true;
+      if ("-level".equals(arg)) {
+        if (i == args.length - 1) {
+          throw new IllegalArgumentException("ERROR: missing value for -level option");
+        }
+        i++;
+        int level = Integer.parseInt(args[i]);
+        Level.checkIfLevelInBounds(level);
+        opts.level = level;
+      } else if ("-fast".equals(arg)) {
+        // Deprecated. Remove in Lucene 11.
+        System.err.println(
+            "-fast is deprecated, use '-level 1' for explicitly verifying file checksums only. This is also now the default "
+                + "behaviour!");
+      } else if ("-slow".equals(arg)) {
+        // Deprecated. Remove in Lucene 11.
+        System.err.println("-slow is deprecated, use '-level 3' instead for slow checks");
+        opts.level = Level.MIN_LEVEL_FOR_SLOW_CHECKS;
      } else if ("-exorcise".equals(arg)) {
        opts.doExorcise = true;
      } else if ("-crossCheckTermVectors".equals(arg)) {
-        System.err.println("-crossCheckTermVectors is deprecated, use -slow instead");
-        opts.doSlowChecks = true;
-      } else if ("-slow".equals(arg)) {
-        opts.doSlowChecks = true;
+        // Deprecated. Remove in Lucene 11.
+        System.err.println("-crossCheckTermVectors is deprecated, use '-level 3' instead");
+        opts.level = Level.MAX_VALUE;
      } else if (arg.equals("-verbose")) {
        opts.verbose = true;
      } else if (arg.equals("-segment")) {
@ -4096,11 +4185,13 @@ public final class CheckIndex implements Closeable {
    if (opts.indexPath == null) {
      throw new IllegalArgumentException(
          "\nERROR: index path not specified"
-              + "\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-slow] [-segment X] [-segment Y] [-threadCount X] [-dir-impl X]\n"
+              + "\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-exorcise] [-level X] [-segment X] [-segment Y] [-threadCount X] [-dir-impl X]\n"
              + "\n"
              + "  -exorcise: actually write a new segments_N file, removing any problematic segments\n"
-              + "  -fast: just verify file checksums, omitting logical integrity checks\n"
-              + "  -slow: do additional slow checks; THIS IS VERY SLOW!\n"
+              + "  -level X: sets the detail level of the check. The higher the value, the more checks are done.\n"
+              + "         1 - (Default) Checksum checks only.\n"
+              + "         2 - All level 1 checks + logical integrity checks.\n"
+              + "         3 - All level 2 checks + slow checks.\n"
              + "  -codec X: when exorcising, codec to write the new segments_N file with\n"
              + "  -verbose: print additional details\n"
              + "  -segment X: only check the specified segments.  This can be specified multiple\n"
@ -4115,7 +4206,8 @@ public final class CheckIndex implements Closeable {
              + "If no package is specified the "
              + FSDirectory.class.getPackage().getName()
              + " package will be used.\n"
-              + "\n"
+              + "CheckIndex only verifies file checksums as default.\n"
+              + "Use -level with value of '2' or higher if you also want to check segment file contents.\n\n"
              + "**WARNING**: -exorcise *LOSES DATA*. This should only be used on an emergency basis as it will cause\n"
              + "documents (perhaps many) to be permanently removed from the index.  Always make\n"
              + "a backup copy of your index before running this!  Do not run this tool on an index\n"
@ -4137,10 +4229,6 @@ public final class CheckIndex implements Closeable {
      throw new IllegalArgumentException("ERROR: cannot specify both -exorcise and -segment");
    }

-    if (opts.doChecksumsOnly && opts.doSlowChecks) {
-      throw new IllegalArgumentException("ERROR: cannot specify both -fast and -slow");
-    }
-
    return opts;
  }

@ -4151,8 +4239,7 @@ public final class CheckIndex implements Closeable {
   * @return 0 iff the index is clean, 1 otherwise
   */
  public int doCheck(Options opts) throws IOException, InterruptedException {
-    setDoSlowChecks(opts.doSlowChecks);
-    setChecksumsOnly(opts.doChecksumsOnly);
+    setLevel(opts.level);
    setInfoStream(opts.out, opts.verbose);
    // user provided thread count via command line argument, overriding the default with user
    // provided value
@ -4166,8 +4253,8 @@ public final class CheckIndex implements Closeable {
      return 1;
    }

-    if (!result.clean) {
-      if (!opts.doExorcise) {
+    if (result.clean == false) {
+      if (opts.doExorcise == false) {
        opts.out.println(
            "WARNING: would write new segments file, and "
                + result.totLoseDocCount
--- a/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldUpdatesBuffer.java
@ -270,7 +270,6 @@ final class FieldUpdatesBuffer {
  static class BufferedUpdate {

    private BufferedUpdate() {}
-    ;

    /** the max document ID this update should be applied to */
    int docUpTo;
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -33,6 +33,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
+import java.util.Objects;
 import java.util.Queue;
 import java.util.Set;
 import java.util.concurrent.ConcurrentLinkedQueue;
@ -55,6 +56,8 @@ import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate;
 import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate;
 import org.apache.lucene.index.FieldInfos.FieldNumbers;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.MergePolicy.MergeReader;
+import org.apache.lucene.index.Sorter.DocMap;
 import org.apache.lucene.internal.tests.IndexPackageAccess;
 import org.apache.lucene.internal.tests.IndexWriterAccess;
 import org.apache.lucene.internal.tests.TestSecrets;
@ -3413,8 +3416,20 @@ public class IndexWriter
            Collections.emptyMap(),
            config.getIndexSort());

-    List<CodecReader> readers =
-        merge.getMergeReader().stream().map(r -> r.codecReader).collect(Collectors.toList());
+    List<CodecReader> readers = new ArrayList<>();
+    for (MergeReader mr : merge.getMergeReader()) {
+      CodecReader reader = merge.wrapForMerge(mr.codecReader);
+      readers.add(reader);
+    }
+
+    if (config.getIndexSort() == null && readers.isEmpty() == false) {
+      CodecReader mergedReader = SlowCompositeCodecReaderWrapper.wrap(readers);
+      DocMap docMap = merge.reorder(mergedReader, directory);
+      if (docMap != null) {
+        readers = Collections.singletonList(SortingCodecReader.wrap(mergedReader, docMap, null));
+      }
+    }
+
    SegmentMerger merger =
        new SegmentMerger(readers, segInfo, infoStream, trackingDir, globalFieldNumberMap, context);

@ -3464,6 +3479,8 @@ public class IndexWriter
      merge.getMergeInfo().info.setUseCompoundFile(true);
    }

+    merge.setMergeInfo(merge.info);
+
    // Have codec write SegmentInfo.  Must do this after
    // creating CFS so that 1) .si isn't slurped into CFS,
    // and 2) .si reflects useCompoundFile=true change
@ -3791,7 +3808,7 @@ public class IndexWriter
            new OneMergeWrappingMergePolicy(
                config.getMergePolicy(),
                toWrap ->
-                    new MergePolicy.OneMerge(toWrap.segments) {
+                    new MergePolicy.OneMerge(toWrap) {
                      SegmentCommitInfo origInfo;
                      final AtomicBoolean onlyOnce = new AtomicBoolean(false);

@ -3890,6 +3907,18 @@ public class IndexWriter
                      public CodecReader wrapForMerge(CodecReader reader) throws IOException {
                        return toWrap.wrapForMerge(reader); // must delegate
                      }
+
+                      @Override
+                      public Sorter.DocMap reorder(CodecReader reader, Directory dir)
+                          throws IOException {
+                        return toWrap.reorder(reader, dir); // must delegate
+                      }
+
+                      @Override
+                      public void setMergeInfo(SegmentCommitInfo info) {
+                        super.setMergeInfo(info);
+                        toWrap.setMergeInfo(info);
+                      }
                    }),
            trigger,
            UNBOUNDED_MAX_MERGE_SEGMENTS);
@ -4312,7 +4341,7 @@ public class IndexWriter
   * merge.info). If no deletes were flushed, no new deletes file is saved.
   */
  private synchronized ReadersAndUpdates commitMergedDeletesAndUpdates(
-      MergePolicy.OneMerge merge, MergeState mergeState) throws IOException {
+      MergePolicy.OneMerge merge, MergeState.DocMap[] docMaps) throws IOException {

    mergeFinishedGen.incrementAndGet();

@ -4336,7 +4365,7 @@ public class IndexWriter

    boolean anyDVUpdates = false;

-    assert sourceSegments.size() == mergeState.docMaps.length;
+    assert sourceSegments.size() == docMaps.length;
    for (int i = 0; i < sourceSegments.size(); i++) {
      SegmentCommitInfo info = sourceSegments.get(i);
      minGen = Math.min(info.getBufferedDeletesGen(), minGen);
@ -4346,12 +4375,11 @@ public class IndexWriter
      // the pool:
      assert rld != null : "seg=" + info.info.name;

-      MergeState.DocMap segDocMap = mergeState.docMaps[i];
+      MergeState.DocMap segDocMap = docMaps[i];

      carryOverHardDeletes(
          mergedDeletesAndUpdates,
          maxDoc,
-          mergeState.liveDocs[i],
          merge.getMergeReader().get(i).hardLiveDocs,
          rld.getHardLiveDocs(),
          segDocMap);
@ -4454,26 +4482,21 @@ public class IndexWriter
  private static void carryOverHardDeletes(
      ReadersAndUpdates mergedReadersAndUpdates,
      int maxDoc,
-      Bits mergeLiveDocs, // the liveDocs used to build the segDocMaps
      Bits prevHardLiveDocs, // the hard deletes when the merge reader was pulled
      Bits currentHardLiveDocs, // the current hard deletes
      MergeState.DocMap segDocMap)
      throws IOException {

-    assert mergeLiveDocs == null || mergeLiveDocs.length() == maxDoc;
    // if we mix soft and hard deletes we need to make sure that we only carry over deletes
    // that were not deleted before. Otherwise the segDocMap doesn't contain a mapping.
    // yet this is also required if any MergePolicy modifies the liveDocs since this is
    // what the segDocMap is build on.
    final IntPredicate carryOverDelete =
-        mergeLiveDocs == null || mergeLiveDocs == prevHardLiveDocs
-            ? docId -> currentHardLiveDocs.get(docId) == false
-            : docId -> mergeLiveDocs.get(docId) && currentHardLiveDocs.get(docId) == false;
+        docId -> segDocMap.get(docId) != -1 && currentHardLiveDocs.get(docId) == false;
    if (prevHardLiveDocs != null) {
      // If we had deletions on starting the merge we must
      // still have deletions now:
      assert currentHardLiveDocs != null;
-      assert mergeLiveDocs != null;
      assert prevHardLiveDocs.length() == maxDoc;
      assert currentHardLiveDocs.length() == maxDoc;

@ -4516,7 +4539,7 @@ public class IndexWriter
  }

  @SuppressWarnings("try")
-  private synchronized boolean commitMerge(MergePolicy.OneMerge merge, MergeState mergeState)
+  private synchronized boolean commitMerge(MergePolicy.OneMerge merge, MergeState.DocMap[] docMaps)
      throws IOException {
    merge.onMergeComplete();
    testPoint("startCommitMerge");
@ -4559,7 +4582,7 @@ public class IndexWriter
    }

    final ReadersAndUpdates mergedUpdates =
-        merge.info.info.maxDoc() == 0 ? null : commitMergedDeletesAndUpdates(merge, mergeState);
+        merge.info.info.maxDoc() == 0 ? null : commitMergedDeletesAndUpdates(merge, docMaps);

    // If the doc store we are using has been closed and
    // is in now compound format (but wasn't when we
@ -5163,12 +5186,57 @@ public class IndexWriter
        }
        mergeReaders.add(wrappedReader);
      }
+
+      MergeState.DocMap[] reorderDocMaps = null;
+      if (config.getIndexSort() == null) {
+        // Create a merged view of the input segments. This effectively does the merge.
+        CodecReader mergedView = SlowCompositeCodecReaderWrapper.wrap(mergeReaders);
+        Sorter.DocMap docMap = merge.reorder(mergedView, directory);
+        if (docMap != null) {
+          reorderDocMaps = new MergeState.DocMap[mergeReaders.size()];
+          int docBase = 0;
+          int i = 0;
+          for (CodecReader reader : mergeReaders) {
+            final int currentDocBase = docBase;
+            reorderDocMaps[i] =
+                docID -> {
+                  Objects.checkIndex(docID, reader.maxDoc());
+                  return docMap.oldToNew(currentDocBase + docID);
+                };
+            i++;
+            docBase += reader.maxDoc();
+          }
+          // This makes merging more expensive as it disables some bulk merging optimizations, so
+          // only do this if a non-null DocMap is returned.
+          mergeReaders =
+              Collections.singletonList(SortingCodecReader.wrap(mergedView, docMap, null));
+        }
+      }
+
      final SegmentMerger merger =
          new SegmentMerger(
              mergeReaders, merge.info.info, infoStream, dirWrapper, globalFieldNumberMap, context);
      merge.info.setSoftDelCount(Math.toIntExact(softDeleteCount.get()));
      merge.checkAborted();

+      MergeState mergeState = merger.mergeState;
+      MergeState.DocMap[] docMaps;
+      if (reorderDocMaps == null) {
+        docMaps = mergeState.docMaps;
+      } else {
+        // Since the reader was reordered, we passed a merged view to MergeState and from its
+        // perspective there is a single input segment to the merge and the
+        // SlowCompositeCodecReaderWrapper is effectively doing the merge.
+        assert mergeState.docMaps.length == 1
+            : "Got " + mergeState.docMaps.length + " docMaps, but expected 1";
+        MergeState.DocMap compactionDocMap = mergeState.docMaps[0];
+        docMaps = new MergeState.DocMap[reorderDocMaps.length];
+        for (int i = 0; i < docMaps.length; ++i) {
+          MergeState.DocMap reorderDocMap = reorderDocMaps[i];
+          docMaps[i] = docID -> compactionDocMap.get(reorderDocMap.get(docID));
+        }
+      }
+
      merge.mergeStartNS = System.nanoTime();

      // This is where all the work happens:
@ -5176,7 +5244,6 @@ public class IndexWriter
        merger.merge();
      }

-      MergeState mergeState = merger.mergeState;
      assert mergeState.segmentInfo == merge.info.info;
      merge.info.info.setFiles(new HashSet<>(dirWrapper.getCreatedFiles()));
      Codec codec = config.getCodec();
@ -5229,7 +5296,7 @@ public class IndexWriter
        // Merge would produce a 0-doc segment, so we do nothing except commit the merge to remove
        // all the 0-doc segments that we "merged":
        assert merge.info.info.maxDoc() == 0;
-        success = commitMerge(merge, mergeState);
+        success = commitMerge(merge, docMaps);
        return 0;
      }

@ -5309,6 +5376,8 @@ public class IndexWriter
        success = false;
      }

+      merge.setMergeInfo(merge.info);
+
      // Have codec write SegmentInfo.  Must do this after
      // creating CFS so that 1) .si isn't slurped into CFS,
      // and 2) .si reflects useCompoundFile=true change
@ -5352,7 +5421,7 @@ public class IndexWriter
        }
      }

-      if (!commitMerge(merge, mergeState)) {
+      if (!commitMerge(merge, docMaps)) {
        // commitMerge will return false if this merge was
        // aborted
        return 0;
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
@ -255,6 +255,15 @@ public abstract class MergePolicy {
      usesPooledReaders = false;
    }

+    /** Constructor for wrapping. */
+    protected OneMerge(OneMerge oneMerge) {
+      this.segments = oneMerge.segments;
+      this.mergeReaders = oneMerge.mergeReaders;
+      this.totalMaxDoc = oneMerge.totalMaxDoc;
+      this.mergeProgress = new OneMergeProgress();
+      this.usesPooledReaders = oneMerge.usesPooledReaders;
+    }
+
    /**
     * Called by {@link IndexWriter} after the merge started and from the thread that will be
     * executing the merge.
@ -288,11 +297,32 @@ public abstract class MergePolicy {
      }
    }

-    /** Wrap the reader in order to add/remove information to the merged segment. */
+    /**
+     * Wrap a reader prior to merging in order to add/remove fields or documents.
+     *
+     * <p><b>NOTE:</b> It is illegal to reorder doc IDs here, use {@link
+     * #reorder(CodecReader,Directory)} instead.
+     */
    public CodecReader wrapForMerge(CodecReader reader) throws IOException {
      return reader;
    }

+    /**
+     * Extend this method if you wish to renumber doc IDs. This method will be called when index
+     * sorting is disabled on a merged view of the {@link OneMerge}. A {@code null} return value
+     * indicates that doc IDs should not be reordered.
+     *
+     * <p><b>NOTE:</b> Returning a non-null value here disables several optimizations and increases
+     * the merging overhead.
+     *
+     * @param reader The reader to reorder.
+     * @param dir The {@link Directory} of the index, which may be used to create temporary files.
+     * @lucene.experimental
+     */
+    public Sorter.DocMap reorder(CodecReader reader, Directory dir) throws IOException {
+      return null;
+    }
+
    /**
     * Expert: Sets the {@link SegmentCommitInfo} of the merged segment. Allows sub-classes to e.g.
     * {@link SegmentInfo#addDiagnostics(Map) add diagnostic} properties.
@ -355,11 +385,7 @@ public abstract class MergePolicy {
     * not indicate the number of documents after the merge.
     */
    public int totalNumDocs() {
-      int total = 0;
-      for (SegmentCommitInfo info : segments) {
-        total += info.info.maxDoc();
-      }
-      return total;
+      return totalMaxDoc;
    }

    /** Return {@link MergeInfo} describing this merge. */
--- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
@ -177,16 +177,13 @@ public class MergeState {

      final int docBase = totalDocs;
      docMaps[i] =
-          new DocMap() {
-            @Override
-            public int get(int docID) {
-              if (liveDocs == null) {
-                return docBase + docID;
-              } else if (liveDocs.get(docID)) {
-                return docBase + (int) delDocMap.get(docID);
-              } else {
-                return -1;
-              }
+          docID -> {
+            if (liveDocs == null) {
+              return docBase + docID;
+            } else if (liveDocs.get(docID)) {
+              return docBase + (int) delDocMap.get(docID);
+            } else {
+              return -1;
            }
          };
      totalDocs += reader.numDocs();
@ -242,13 +239,10 @@ public class MergeState {
  }

  /** A map of doc IDs. */
-  public abstract static class DocMap {
-    /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */
-    // Explicitly declared so that we have non-empty javadoc
-    protected DocMap() {}
-
+  @FunctionalInterface
+  public interface DocMap {
    /** Return the mapped docID or -1 if the given doc is not mapped. */
-    public abstract int get(int docID);
+    int get(int docID);
  }

  static PackedLongValues removeDeletes(final int maxDoc, final Bits liveDocs) {
--- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java
@ -122,14 +122,11 @@ final class MultiSorter {
      final PackedLongValues remapped = builders[i].build();
      final Bits liveDocs = readers.get(i).getLiveDocs();
      docMaps[i] =
-          new MergeState.DocMap() {
-            @Override
-            public int get(int docID) {
-              if (liveDocs == null || liveDocs.get(docID)) {
-                return (int) remapped.get(docID);
-              } else {
-                return -1;
-              }
+          docID -> {
+            if (liveDocs == null || liveDocs.get(docID)) {
+              return (int) remapped.get(docID);
+            } else {
+              return -1;
            }
          };
    }
--- a/lucene/core/src/java/org/apache/lucene/index/PointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
@ -325,7 +325,6 @@ public abstract class PointValues {

    /** Notifies the caller that this many documents are about to be visited */
    default void grow(int count) {}
-    ;
  }

  /**
--- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
+++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndUpdates.java
@ -526,7 +526,6 @@ final class ReadersAndUpdates {
      return docIDOut;
    }
  }
-  ;

  private synchronized Set<String> writeFieldInfosGen(
      FieldInfos fieldInfos, Directory dir, FieldInfosFormat infosFormat) throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
@ -122,7 +122,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
  static final int VERSION_CURRENT = VERSION_86;

  /** Name of the generation reference file name */
-  private static final String OLD_SEGMENTS_GEN = "segments.gen";
+  static final String OLD_SEGMENTS_GEN = "segments.gen";

  /** Used to name new segments. */
  public long counter;
@ -146,7 +146,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
   *
   * @see #setInfoStream
   */
-  private static PrintStream infoStream = null;
+  private static PrintStream infoStream;

  /** Id for this commit; only written starting with Lucene 5.0 */
  private byte[] id;
@ -1010,6 +1010,7 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
  void replace(SegmentInfos other) {
    rollbackSegmentInfos(other.asList());
    lastGeneration = other.lastGeneration;
+    userData = other.userData;
  }

  /** Returns sum of all segment's maxDocs. Note that this does not include deletions */
--- a/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SlowCompositeCodecReaderWrapper.java
--- a/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortingCodecReader.java
@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Objects;
 import org.apache.lucene.codecs.DocValuesProducer;
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.KnnVectorsReader;
@ -77,7 +78,7 @@ public final class SortingCodecReader extends FilterCodecReader {
    private final Sorter.DocMap docMap;

    SortingPointValues(final PointValues in, Sorter.DocMap docMap) {
-      this.in = in;
+      this.in = Objects.requireNonNull(in);
      this.docMap = docMap;
    }

@ -472,6 +473,10 @@ public final class SortingCodecReader extends FilterCodecReader {

      @Override
      public PointValues getValues(String field) throws IOException {
+        var values = delegate.getValues(field);
+        if (values == null) {
+          return null;
+        }
        return new SortingPointValues(delegate.getValues(field), docMap);
      }

--- a/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexOrDocValuesQuery.java
@ -85,7 +85,11 @@ public final class IndexOrDocValuesQuery extends Query {

  @Override
  public String toString(String field) {
-    return indexQuery.toString(field);
+    return "IndexOrDocValuesQuery(indexQuery="
+        + indexQuery.toString(field)
+        + ", dvQuery="
+        + dvQuery.toString(field)
+        + ")";
  }

  @Override
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@ -19,7 +19,6 @@ package org.apache.lucene.search;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
@ -62,9 +61,9 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton;
 * match lots of documents, counting the number of hits may take much longer than computing the top
 * hits so this trade-off allows to get some minimal information about the hit count without slowing
 * down search too much. The {@link TopDocs#scoreDocs} array is always accurate however. If this
- * behavior doesn't suit your needs, you should create collectors manually with either {@link
- * TopScoreDocCollector#create} or {@link TopFieldCollector#create} and call {@link #search(Query,
- * Collector)}.
+ * behavior doesn't suit your needs, you should create collectorManagers manually with either {@link
+ * TopScoreDocCollectorManager} or {@link TopFieldCollectorManager} and call {@link #search(Query,
+ * CollectorManager)}.
 *
 * <p><a id="thread-safety"></a>
 *
@ -455,35 +454,10 @@ public class IndexSearcher {
    }

    final int cappedNumHits = Math.min(numHits, limit);
-
-    final LeafSlice[] leafSlices = getSlices();
-    final CollectorManager<TopScoreDocCollector, TopDocs> manager =
-        new CollectorManager<TopScoreDocCollector, TopDocs>() {
-
-          private final HitsThresholdChecker hitsThresholdChecker =
-              leafSlices.length <= 1
-                  ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits))
-                  : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
-
-          private final MaxScoreAccumulator minScoreAcc =
-              leafSlices.length <= 1 ? null : new MaxScoreAccumulator();
-
-          @Override
-          public TopScoreDocCollector newCollector() throws IOException {
-            return TopScoreDocCollector.create(
-                cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
-          }
-
-          @Override
-          public TopDocs reduce(Collection<TopScoreDocCollector> collectors) throws IOException {
-            final TopDocs[] topDocs = new TopDocs[collectors.size()];
-            int i = 0;
-            for (TopScoreDocCollector collector : collectors) {
-              topDocs[i++] = collector.topDocs();
-            }
-            return TopDocs.merge(0, cappedNumHits, topDocs);
-          }
-        };
+    final boolean supportsConcurrency = getSlices().length > 1;
+    CollectorManager<TopScoreDocCollector, TopDocs> manager =
+        new TopScoreDocCollectorManager(
+            cappedNumHits, after, TOTAL_HITS_THRESHOLD, supportsConcurrency);

    return search(query, manager);
  }
@ -510,7 +484,10 @@ public class IndexSearcher {
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
+   * @deprecated This method is being deprecated in favor of {@link IndexSearcher#search(Query,
+   *     CollectorManager)} due to its support for concurrency in IndexSearcher
   */
+  @Deprecated
  public void search(Query query, Collector results) throws IOException {
    query = rewrite(query, results.scoreMode().needsScores());
    search(leafContexts, createWeight(query, results.scoreMode(), 1), results);
@ -602,34 +579,10 @@ public class IndexSearcher {
    final Sort rewrittenSort = sort.rewrite(this);
    final LeafSlice[] leafSlices = getSlices();

+    final boolean supportsConcurrency = leafSlices.length > 1;
    final CollectorManager<TopFieldCollector, TopFieldDocs> manager =
-        new CollectorManager<>() {
-
-          private final HitsThresholdChecker hitsThresholdChecker =
-              leafSlices.length <= 1
-                  ? HitsThresholdChecker.create(Math.max(TOTAL_HITS_THRESHOLD, numHits))
-                  : HitsThresholdChecker.createShared(Math.max(TOTAL_HITS_THRESHOLD, numHits));
-
-          private final MaxScoreAccumulator minScoreAcc =
-              leafSlices.length <= 1 ? null : new MaxScoreAccumulator();
-
-          @Override
-          public TopFieldCollector newCollector() throws IOException {
-            // TODO: don't pay the price for accurate hit counts by default
-            return TopFieldCollector.create(
-                rewrittenSort, cappedNumHits, after, hitsThresholdChecker, minScoreAcc);
-          }
-
-          @Override
-          public TopFieldDocs reduce(Collection<TopFieldCollector> collectors) throws IOException {
-            final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()];
-            int i = 0;
-            for (TopFieldCollector collector : collectors) {
-              topDocs[i++] = collector.topDocs();
-            }
-            return TopDocs.merge(rewrittenSort, 0, cappedNumHits, topDocs);
-          }
-        };
+        new TopFieldCollectorManager(
+            rewrittenSort, cappedNumHits, after, TOTAL_HITS_THRESHOLD, supportsConcurrency);

    TopFieldDocs topDocs = search(query, manager);
    if (doDocScores) {
--- a/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
+++ b/lucene/core/src/java/org/apache/lucene/search/PointInSetQuery.java
@ -69,7 +69,6 @@ public abstract class PointInSetQuery extends Query implements Accountable {
    @Override
    public abstract BytesRef next();
  }
-  ;

  /** The {@code packedPoints} iterator must be in sorted order. */
  protected PointInSetQuery(String field, int numDims, int bytesPerDim, Stream packedPoints) {
--- a/Show More
+++ b/Show More