LUCENE-9254: UniformSplit supports FST off-heap.

Closes #1301
2020-02-27 12:56:58 +01:00 · 2020-02-27 12:56:58 +01:00 · c7cf9e8e4f
parent 7b9f212907
commit c7cf9e8e4f
16 changed files with 156 additions and 88 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -118,7 +118,8 @@ Improvements

 Optimizations
 ---------------------
-(No changes)
+
+* LUCENE-9254: UniformSplit keeps FST off-heap. (Bruno Roustant)

 Bug Fixes
 ---------------------
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FSTDictionary.java
@ -30,6 +30,7 @@ import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;
+import org.apache.lucene.util.fst.OffHeapFSTStore;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;

@ -56,24 +57,24 @@ public class FSTDictionary implements IndexDictionary {

  private static final long BASE_RAM_USAGE = RamUsageEstimator.shallowSizeOfInstance(FSTDictionary.class);

-  protected final FST<Long> dictionary;
+  protected final FST<Long> fst;

-  protected FSTDictionary(FST<Long> dictionary) {
-    this.dictionary = dictionary;
+  protected FSTDictionary(FST<Long> fst) {
+    this.fst = fst;
  }

  @Override
  public long ramBytesUsed() {
-    return BASE_RAM_USAGE + dictionary.ramBytesUsed();
+    return BASE_RAM_USAGE + fst.ramBytesUsed();
  }

  @Override
  public void write(DataOutput output, BlockEncoder blockEncoder) throws IOException {
    if (blockEncoder == null) {
-      dictionary.save(output);
+      fst.save(output);
    } else {
      ByteBuffersDataOutput bytesDataOutput = ByteBuffersDataOutput.newResettableInstance();
-      dictionary.save(bytesDataOutput);
+      fst.save(bytesDataOutput);
      BlockEncoder.WritableBytes encodedBytes = blockEncoder.encode(bytesDataOutput.toDataInput(), bytesDataOutput.size());
      output.writeVLong(encodedBytes.size());
      encodedBytes.writeTo(output);
@ -84,7 +85,7 @@ public class FSTDictionary implements IndexDictionary {
   * Reads a {@link FSTDictionary} from the provided input.
   * @param blockDecoder The {@link BlockDecoder} to use for specific decoding; or null if none.
   */
-  protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder) throws IOException {
+  protected static FSTDictionary read(DataInput input, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
    DataInput fstDataInput;
    if (blockDecoder == null) {
      fstDataInput = input;
@ -92,10 +93,14 @@ public class FSTDictionary implements IndexDictionary {
      long numBytes = input.readVLong();
      BytesRef decodedBytes = blockDecoder.decode(input, numBytes);
      fstDataInput = new ByteArrayDataInput(decodedBytes.bytes, 0, decodedBytes.length);
+      // OffHeapFSTStore.init() requires a DataInput which is an instance of IndexInput.
+      // When the block is decoded we must load the FST on heap.
+      isFSTOnHeap = true;
    }
    PositiveIntOutputs fstOutputs = PositiveIntOutputs.getSingleton();
-    FST<Long> dictionary = new FST<>(fstDataInput, fstOutputs);
-    return new FSTDictionary(dictionary);
+    FST<Long> fst = isFSTOnHeap ? new FST<>(fstDataInput, fstOutputs)
+        : new FST<>(fstDataInput, fstOutputs, new OffHeapFSTStore());
+    return new FSTDictionary(fst);
  }

  @Override
@ -109,7 +114,7 @@ public class FSTDictionary implements IndexDictionary {
   */
  protected class Browser implements IndexDictionary.Browser {

-    protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(dictionary);
+    protected final BytesRefFSTEnum<Long> fstEnum = new BytesRefFSTEnum<>(fst);

    @Override
    public long seekBlock(BytesRef term) throws IOException {
@ -127,16 +132,19 @@ public class FSTDictionary implements IndexDictionary {

    protected final IndexInput dictionaryInput;
    protected final BlockDecoder blockDecoder;
+    protected final boolean isFSTOnHeap;

    /**
-     * Lazy loaded immutable index dictionary (trie hold in RAM).
+     * Lazy loaded immutable index dictionary FST.
+     * The FST is either kept off-heap, or hold in RAM on-heap.
     */
    protected IndexDictionary dictionary;

-    public BrowserSupplier(IndexInput dictionaryInput, long startFilePointer, BlockDecoder blockDecoder) throws IOException {
+    public BrowserSupplier(IndexInput dictionaryInput, long dictionaryStartFP, BlockDecoder blockDecoder, boolean isFSTOnHeap) throws IOException {
      this.dictionaryInput = dictionaryInput.clone();
-      this.dictionaryInput.seek(startFilePointer);
+      this.dictionaryInput.seek(dictionaryStartFP);
      this.blockDecoder = blockDecoder;
+      this.isFSTOnHeap = isFSTOnHeap;
    }

    @Override
@ -147,7 +155,7 @@ public class FSTDictionary implements IndexDictionary {
      if (dictionary == null) {
        synchronized (this) {
          if (dictionary == null) {
-            dictionary = read(dictionaryInput, blockDecoder);
+            dictionary = read(dictionaryInput, blockDecoder, isFSTOnHeap);
          }
        }
      }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/FieldMetadata.java
@ -59,31 +59,38 @@ public class FieldMetadata implements Accountable {
  protected BytesRef lastTerm;

  /**
-   * Constructs a {@link FieldMetadata} used for writing the index. This {@link FieldMetadata} is mutable.
-   *
+   * Constructs field metadata for writing.
   * @param maxDoc The total number of documents in the segment being written.
   */
  public FieldMetadata(FieldInfo fieldInfo, int maxDoc) {
    this(fieldInfo, maxDoc, true);
  }

-  public FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) {
-    this(fieldInfo, maxDoc, isMutable, -1, -1, null);
+  /**
+   * Constructs immutable virtual field metadata for reading.
+   */
+  public FieldMetadata(long dictionaryStartFP, long firstBlockStartFP, long lastBlockStartFP, BytesRef lastTerm) {
+    this(null, 0, false);
+    this.dictionaryStartFP = dictionaryStartFP;
+    this.firstBlockStartFP = firstBlockStartFP;
+    this.lastBlockStartFP = lastBlockStartFP;
+    this.lastTerm = lastTerm;
  }

  /**
+   * Constructs field metadata for reading or writing.
+   * @param maxDoc The total number of documents in the segment being written.
   * @param isMutable Set true if this FieldMetadata is created for writing the index. Set false if it is used for reading the index.
   */
-  public FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable, long firstBlockStartFP, long lastBlockStartFP, BytesRef lastTerm) {
+  protected FieldMetadata(FieldInfo fieldInfo, int maxDoc, boolean isMutable) {
    assert isMutable || maxDoc == 0;
    this.fieldInfo = fieldInfo;
    this.isMutable = isMutable;
    // docsSeen must not be set if this FieldMetadata is immutable, that means it is used for reading the index.
    this.docsSeen = isMutable ? new FixedBitSet(maxDoc) : null;
    this.dictionaryStartFP = -1;
-    this.firstBlockStartFP = firstBlockStartFP;
-    this.lastBlockStartFP = lastBlockStartFP;
-    this.lastTerm = lastTerm;
+    this.firstBlockStartFP = -1;
+    this.lastBlockStartFP = -1;
  }

  /**
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/IndexDictionary.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/IndexDictionary.java
@ -112,5 +112,6 @@ public interface IndexDictionary extends Accountable {
   * {@link org.apache.lucene.index.TermsEnum#seekExact} are called (it is not loaded for a direct
   * all-terms enumeration).
   */
-  interface BrowserSupplier extends IOSupplier<Browser>, Accountable {}
+  interface BrowserSupplier extends IOSupplier<Browser>, Accountable {
+  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitPostingsFormat.java
@ -55,9 +55,14 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
  protected final int deltaNumLines;
  protected final BlockEncoder blockEncoder;
  protected final BlockDecoder blockDecoder;
+  protected final boolean dictionaryOnHeap;

+  /**
+   * Creates a {@link UniformSplitPostingsFormat} with default settings.
+   */
  public UniformSplitPostingsFormat() {
-    this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES, null, null);
+    this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
+        null, null, false);
  }

  /**
@ -73,12 +78,20 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
   *                            It can be used for compression or encryption.
   * @param blockDecoder        Optional block decoder, may be null if none.
   *                            It can be used for compression or encryption.
+   * @param dictionaryOnHeap    Whether to force loading the terms dictionary on-heap. By default it is kept off-heap without
+   *                            impact on performance. If block encoding/decoding is used, then the dictionary is always
+   *                            loaded on-heap whatever this parameter value is.
   */
-  public UniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
-    this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder);
+  public UniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder,
+                                    boolean dictionaryOnHeap) {
+    this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
  }

-  protected UniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
+  /**
+   * @see #UniformSplitPostingsFormat(int, int, BlockEncoder, BlockDecoder, boolean)
+   */
+  protected UniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
+                                       BlockDecoder blockDecoder, boolean dictionaryOnHeap) {
    super(name);
    UniformSplitTermsWriter.validateSettings(targetNumBlockLines, deltaNumLines);
    validateBlockEncoder(blockEncoder, blockDecoder);
@ -86,6 +99,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {
    this.deltaNumLines = deltaNumLines;
    this.blockEncoder = blockEncoder;
    this.blockDecoder = blockDecoder;
+    this.dictionaryOnHeap = dictionaryOnHeap;
  }

  @Override
@ -125,7 +139,7 @@ public class UniformSplitPostingsFormat extends PostingsFormat {

  protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
                                               BlockDecoder blockDecoder) throws IOException {
-    return new UniformSplitTermsReader(postingsReader, state, blockDecoder);
+    return new UniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
  }

  private static void validateBlockEncoder(BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTerms.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTerms.java
@ -49,15 +49,6 @@ public class UniformSplitTerms extends Terms implements Accountable {
  protected final BlockDecoder blockDecoder;
  protected final IndexDictionary.BrowserSupplier dictionaryBrowserSupplier;

-  /**
-   * @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption.
-   */
-  protected UniformSplitTerms(IndexInput dictionaryInput, IndexInput blockInput, FieldMetadata fieldMetadata,
-                    PostingsReaderBase postingsReader, BlockDecoder blockDecoder) throws IOException {
-    this(blockInput, fieldMetadata, postingsReader, blockDecoder,
-        new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadata.getDictionaryStartFP(), blockDecoder));
-  }
-
  /**
   * @param blockDecoder Optional block decoder, may be null if none. It can be used for decompression or decryption.
   */
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitTermsReader.java
@ -65,21 +65,24 @@ public class UniformSplitTermsReader extends FieldsProducer {
  protected final Collection<String> sortedFieldNames;

  /**
-   * @param blockDecoder Optional block decoder, may be null if none.
-   *                     It can be used for decompression or decryption.
+   * @param blockDecoder     Optional block decoder, may be null if none.
+   *                         It can be used for decompression or decryption.
+   * @param dictionaryOnHeap Whether to force loading the terms dictionary on-heap. By default it is kept off-heap without
+   *                         impact on performance. If block encoding/decoding is used, then the dictionary is always
+   *                         loaded on-heap whatever this parameter value is.
   */
-  public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
-    this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
+  public UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
+                                 boolean dictionaryOnHeap) throws IOException {
+    this(postingsReader, state, blockDecoder, dictionaryOnHeap, FieldMetadata.Serializer.INSTANCE, NAME, VERSION_START, VERSION_CURRENT,
        TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
   }
   
  /**
-   * @param blockDecoder Optional block decoder, may be null if none.
-   *                     It can be used for decompression or decryption.
+   * @see #UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
   */
-  protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
-                                    BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
-                                     String codecName, int versionStart, int versionCurrent,
+  protected UniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
+                                    boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
+                                    String codecName, int versionStart, int versionCurrent,
                                    String termsBlocksExtension, String dictionaryExtension) throws IOException {
     IndexInput dictionaryInput = null;
     IndexInput blockInput = null;
@ -108,7 +111,7 @@ public class UniformSplitTermsReader extends FieldsProducer {
       this.blockInput = blockInput;
       this.dictionaryInput = dictionaryInput;

-       fillFieldMap(postingsReader, blockDecoder, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos);
+       fillFieldMap(postingsReader, state, blockDecoder, dictionaryOnHeap, dictionaryInput, blockInput, fieldMetadataCollection, state.fieldInfos);

       List<String> fieldNames = new ArrayList<>(fieldToTermsMap.keySet());
       Collections.sort(fieldNames);
@ -122,15 +125,21 @@ public class UniformSplitTermsReader extends FieldsProducer {
     }
   }

-  protected void fillFieldMap(PostingsReaderBase postingsReader, BlockDecoder blockDecoder,
-                    IndexInput dictionaryInput, IndexInput blockInput,
-                    Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
+  protected void fillFieldMap(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
+                              boolean dictionaryOnHeap, IndexInput dictionaryInput, IndexInput blockInput,
+                              Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
    for (FieldMetadata fieldMetadata : fieldMetadataCollection) {
+      IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = createDictionaryBrowserSupplier(state, dictionaryInput, fieldMetadata, blockDecoder, dictionaryOnHeap);
      fieldToTermsMap.put(fieldMetadata.getFieldInfo().name,
-          new UniformSplitTerms(dictionaryInput, blockInput, fieldMetadata, postingsReader, blockDecoder));
+          new UniformSplitTerms(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier));
    }
  }

+  protected IndexDictionary.BrowserSupplier createDictionaryBrowserSupplier(SegmentReadState state, IndexInput dictionaryInput, FieldMetadata fieldMetadata,
+                                                                         BlockDecoder blockDecoder, boolean dictionaryOnHeap) throws IOException {
+    return new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadata.getDictionaryStartFP(), blockDecoder, dictionaryOnHeap);
+  }
+
  /**
   * @param indexInput {@link IndexInput} must be positioned to the fields metadata
   *                   details by calling {@link #seekFieldsMetadata(IndexInput)} before this call.
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitPostingsFormat.java
@ -58,16 +58,25 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {

  public static final String NAME = "SharedTermsUniformSplit";

+  /**
+   * Creates a {@link STUniformSplitPostingsFormat} with default settings.
+   */
  public STUniformSplitPostingsFormat() {
-    this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES, null, null);
+    this(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
+        null, null, false);
  }

-  public STUniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
-    this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder);
+  /**
+   * @see UniformSplitPostingsFormat#UniformSplitPostingsFormat(int, int, BlockEncoder, BlockDecoder, boolean)
+   */
+  public STUniformSplitPostingsFormat(int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder,
+                                      boolean dictionaryOnHeap) {
+    this(NAME, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
  }

-  protected STUniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder, BlockDecoder blockDecoder) {
-    super(name, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder);
+  protected STUniformSplitPostingsFormat(String name, int targetNumBlockLines, int deltaNumLines, BlockEncoder blockEncoder,
+                                         BlockDecoder blockDecoder, boolean dictionaryOnHeap) {
+    super(name, targetNumBlockLines, deltaNumLines, blockEncoder, blockDecoder, dictionaryOnHeap);
  }

  @Override
@ -79,6 +88,6 @@ public class STUniformSplitPostingsFormat extends UniformSplitPostingsFormat {
  @Override
  protected FieldsProducer createUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
                                               BlockDecoder blockDecoder) throws IOException {
-    return new STUniformSplitTermsReader(postingsReader, state, blockDecoder);
+    return new STUniformSplitTermsReader(postingsReader, state, blockDecoder, dictionaryOnHeap);
  }
 }
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTerms.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTerms.java
@ -43,7 +43,8 @@ public class STUniformSplitTerms extends UniformSplitTerms {

  protected STUniformSplitTerms(IndexInput blockInput, FieldMetadata fieldMetadata,
                                FieldMetadata unionFieldMetadata, PostingsReaderBase postingsReader,
-                                BlockDecoder blockDecoder, FieldInfos fieldInfos, IndexDictionary.BrowserSupplier dictionaryBrowserSupplier) {
+                                BlockDecoder blockDecoder, FieldInfos fieldInfos,
+                                IndexDictionary.BrowserSupplier dictionaryBrowserSupplier) {
    super(blockInput, fieldMetadata, postingsReader, blockDecoder, dictionaryBrowserSupplier);
    this.unionFieldMetadata = unionFieldMetadata;
    this.fieldInfos = fieldInfos;
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitTermsReader.java
@ -22,7 +22,6 @@ import java.util.Collection;

 import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.uniformsplit.BlockDecoder;
-import org.apache.lucene.codecs.uniformsplit.FSTDictionary;
 import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
 import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
 import org.apache.lucene.codecs.uniformsplit.UniformSplitTerms;
@ -46,26 +45,33 @@ import static org.apache.lucene.codecs.uniformsplit.sharedterms.STUniformSplitPo
 */
 public class STUniformSplitTermsReader extends UniformSplitTermsReader {

-  public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder) throws IOException {
-    this(postingsReader, state, blockDecoder, FieldMetadata.Serializer.INSTANCE,
+  /**
+   * @see UniformSplitTermsReader#UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
+   */
+  public STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
+                                   boolean dictionaryOnHeap) throws IOException {
+    this(postingsReader, state, blockDecoder, dictionaryOnHeap, FieldMetadata.Serializer.INSTANCE,
        NAME, VERSION_START, VERSION_CURRENT, TERMS_BLOCKS_EXTENSION, TERMS_DICTIONARY_EXTENSION);
  }

-  protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state,
-                                      BlockDecoder blockDecoder, FieldMetadata.Serializer fieldMetadataReader,
+  /**
+   * @see UniformSplitTermsReader#UniformSplitTermsReader(PostingsReaderBase, SegmentReadState, BlockDecoder, boolean)
+   */
+  protected STUniformSplitTermsReader(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
+                                      boolean dictionaryOnHeap, FieldMetadata.Serializer fieldMetadataReader,
                                      String codecName, int versionStart, int versionCurrent,
                                      String termsBlocksExtension, String dictionaryExtension) throws IOException {
-    super(postingsReader, state, blockDecoder, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
+    super(postingsReader, state, blockDecoder, dictionaryOnHeap, fieldMetadataReader, codecName, versionStart, versionCurrent, termsBlocksExtension, dictionaryExtension);
  }

  @Override
-  protected void fillFieldMap(PostingsReaderBase postingsReader, BlockDecoder blockDecoder,
-                              IndexInput dictionaryInput, IndexInput blockInput,
+  protected void fillFieldMap(PostingsReaderBase postingsReader, SegmentReadState state, BlockDecoder blockDecoder,
+                              boolean dictionaryOnHeap, IndexInput dictionaryInput, IndexInput blockInput,
                              Collection<FieldMetadata> fieldMetadataCollection, FieldInfos fieldInfos) throws IOException {
    if (!fieldMetadataCollection.isEmpty()) {
      FieldMetadata unionFieldMetadata = createUnionFieldMetadata(fieldMetadataCollection);
      // Share the same immutable dictionary between all fields.
-      IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = new FSTDictionary.BrowserSupplier(dictionaryInput, fieldMetadataCollection.iterator().next().getDictionaryStartFP(), blockDecoder);
+      IndexDictionary.BrowserSupplier dictionaryBrowserSupplier = createDictionaryBrowserSupplier(state, dictionaryInput, unionFieldMetadata, blockDecoder, dictionaryOnHeap);
      for (FieldMetadata fieldMetadata : fieldMetadataCollection) {
        fieldToTermsMap.put(fieldMetadata.getFieldInfo().name,
            new STUniformSplitTerms(blockInput, fieldMetadata, unionFieldMetadata, postingsReader, blockDecoder, fieldInfos, dictionaryBrowserSupplier));
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/UnionFieldMetadataBuilder.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/UnionFieldMetadataBuilder.java
@ -27,6 +27,7 @@ import org.apache.lucene.util.BytesRef;
 */
 public class UnionFieldMetadataBuilder {

+  private long dictionaryStartFP;
  private long minStartBlockFP;
  private long maxEndBlockFP;
  private BytesRef maxLastTerm;
@ -36,13 +37,16 @@ public class UnionFieldMetadataBuilder {
  }

  public UnionFieldMetadataBuilder reset() {
-    maxEndBlockFP = Long.MIN_VALUE;
+    dictionaryStartFP = -1;
    minStartBlockFP = Long.MAX_VALUE;
+    maxEndBlockFP = Long.MIN_VALUE;
    maxLastTerm = null;
    return this;
  }

  public UnionFieldMetadataBuilder addFieldMetadata(FieldMetadata fieldMetadata) {
+    assert dictionaryStartFP == -1 || dictionaryStartFP == fieldMetadata.getDictionaryStartFP();
+    dictionaryStartFP = fieldMetadata.getDictionaryStartFP();
    minStartBlockFP = Math.min(minStartBlockFP, fieldMetadata.getFirstBlockStartFP());
    maxEndBlockFP = Math.max(maxEndBlockFP, fieldMetadata.getLastBlockStartFP());
    if (maxLastTerm == null || maxLastTerm.compareTo(fieldMetadata.getLastTerm()) < 0) {
@ -55,6 +59,6 @@ public class UnionFieldMetadataBuilder {
    if (maxLastTerm == null) {
      throw new IllegalStateException("no field metadata was provided");
    }
-    return new FieldMetadata(null, 0, false, minStartBlockFP, maxEndBlockFP, maxLastTerm);
+    return new FieldMetadata(dictionaryStartFP, minStartBlockFP, maxEndBlockFP, maxLastTerm);
  }
 }
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestFSTDictionary.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestFSTDictionary.java
@ -140,6 +140,7 @@ public class TestFSTDictionary extends LuceneTestCase {
  private static FSTDictionary serializeAndReadDictionary(FSTDictionary srcDictionary, boolean shouldEncrypt) throws IOException {
    ByteBuffersDataOutput output = ByteBuffersDataOutput.newResettableInstance();
    srcDictionary.write(output, shouldEncrypt ? Rot13CypherTestUtil.getBlockEncoder() : null);
-    return FSTDictionary.read(output.toDataInput(), shouldEncrypt ? Rot13CypherTestUtil.getBlockDecoder() : null);
+    // We must load the FST on-heap since we use a ByteBuffersDataInput which is not an instance of IndexInput.
+    return FSTDictionary.read(output.toDataInput(), shouldEncrypt ? Rot13CypherTestUtil.getBlockDecoder() : null, true);
  }
 }
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestUniformSplitPostingFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/TestUniformSplitPostingFormat.java
@ -18,6 +18,7 @@
 package org.apache.lucene.codecs.uniformsplit;

 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.index.BasePostingsFormatTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.junit.After;
@ -28,10 +29,21 @@ import org.junit.Before;
 */
 public class TestUniformSplitPostingFormat extends BasePostingsFormatTestCase {

-  private final Codec codec = TestUtil.alwaysPostingsFormat(new UniformSplitRot13PostingsFormat());
-
+  protected final boolean checkEncoding;
+  protected final Codec codec;
  private boolean shouldCheckDecoderWasCalled = true;

+  public TestUniformSplitPostingFormat() {
+    checkEncoding = random().nextBoolean();
+    codec = TestUtil.alwaysPostingsFormat(getPostingsFormat());
+  }
+
+  protected PostingsFormat getPostingsFormat() {
+    return checkEncoding ? new UniformSplitRot13PostingsFormat()
+    : new UniformSplitPostingsFormat(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
+        null, null, random().nextBoolean());
+  }
+
  @Override
  protected Codec getCodec() {
    return codec;
@ -44,10 +56,12 @@ public class TestUniformSplitPostingFormat extends BasePostingsFormatTestCase {

  @After
  public void checkEncodingCalled() {
-    assertTrue(UniformSplitRot13PostingsFormat.blocksEncoded);
-    assertTrue(UniformSplitRot13PostingsFormat.dictionaryEncoded);
-    if (shouldCheckDecoderWasCalled) {
-      assertTrue(UniformSplitRot13PostingsFormat.decoderCalled);
+    if (checkEncoding) {
+      assertTrue(UniformSplitRot13PostingsFormat.blocksEncoded);
+      assertTrue(UniformSplitRot13PostingsFormat.dictionaryEncoded);
+      if (shouldCheckDecoderWasCalled) {
+        assertTrue(UniformSplitRot13PostingsFormat.decoderCalled);
+      }
    }
  }

--- a/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTUniformSplitPostingFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/uniformsplit/sharedterms/TestSTUniformSplitPostingFormat.java
@ -17,19 +17,19 @@

 package org.apache.lucene.codecs.uniformsplit.sharedterms;

-import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.uniformsplit.TestUniformSplitPostingFormat;
-import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.codecs.uniformsplit.UniformSplitTermsWriter;

 /**
 * Tests {@link STUniformSplitPostingsFormat} with block encoding using ROT13 cypher.
 */
 public class TestSTUniformSplitPostingFormat extends TestUniformSplitPostingFormat {

-  private final Codec codec = TestUtil.alwaysPostingsFormat(new STUniformSplitRot13PostingsFormat());
-
  @Override
-  protected Codec getCodec() {
-    return codec;
+  protected PostingsFormat getPostingsFormat() {
+    return checkEncoding ? new STUniformSplitRot13PostingsFormat()
+        : new STUniformSplitPostingsFormat(UniformSplitTermsWriter.DEFAULT_TARGET_NUM_BLOCK_LINES, UniformSplitTermsWriter.DEFAULT_DELTA_NUM_LINES,
+        null, null, random().nextBoolean());
  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitRot13PostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/uniformsplit/UniformSplitRot13PostingsFormat.java
@ -41,13 +41,15 @@ public class UniformSplitRot13PostingsFormat extends PostingsFormat {
  public static volatile boolean decoderCalled;
  public static volatile boolean blocksEncoded;
  public static volatile boolean dictionaryEncoded;
+  protected final boolean dictionaryOnHeap;

  public UniformSplitRot13PostingsFormat() {
-    this("UniformSplitRot13");
+    this("UniformSplitRot13", false);
  }

-  protected UniformSplitRot13PostingsFormat(String name) {
+  protected UniformSplitRot13PostingsFormat(String name, boolean dictionaryOnHeap) {
    super(name);
+    this.dictionaryOnHeap = dictionaryOnHeap;
  }

  public static void resetEncodingFlags() {
@ -135,7 +137,7 @@ public class UniformSplitRot13PostingsFormat extends PostingsFormat {
  }

  protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException {
-    return new UniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder());
+    return new UniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder(), dictionaryOnHeap);
  }

  protected BlockDecoder getBlockDecoder() {
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitRot13PostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/uniformsplit/sharedterms/STUniformSplitRot13PostingsFormat.java
@ -35,7 +35,7 @@ import org.apache.lucene.index.SegmentWriteState;
 public class STUniformSplitRot13PostingsFormat extends UniformSplitRot13PostingsFormat {

  public STUniformSplitRot13PostingsFormat() {
-    super("STUniformSplitRot13");
+    super("STUniformSplitRot13", false);
  }

  protected FieldsConsumer createFieldsConsumer(SegmentWriteState segmentWriteState, PostingsWriterBase postingsWriter) throws IOException {
@ -54,6 +54,6 @@ public class STUniformSplitRot13PostingsFormat extends UniformSplitRot13Postings
  }

  protected FieldsProducer createFieldsProducer(SegmentReadState segmentReadState, PostingsReaderBase postingsReader) throws IOException {
-    return new STUniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder());
+    return new STUniformSplitTermsReader(postingsReader, segmentReadState, getBlockDecoder(), dictionaryOnHeap);
  }
 }