Refactor Lucene95 to allow off heap vector reader reuse (#12629)

While going through: https://github.com/apache/lucene/pull/12582 I noticed that for a while now, our offheap vector readers haven't changed at all. We just keep copying them around for no reason. To make adding a new vector codec simpler, this refactors the lucene95 codec to allow its offheap vector storage format (readers/writers) to be used. Additionally, it will handle writing the appropriate fields for sparse vectors (read/write) to a provided index output/inputs. This should reduce the churn in new codecs significantly.
2023-10-10 11:53:54 -07:00 · 2023-10-10 11:53:54 -07:00 · 05d26ac44d
parent 04f38dd288
commit 05d26ac44d
5 changed files with 293 additions and 126 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsReader.java
@ -248,7 +248,13 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
              + " expected: "
              + VectorEncoding.FLOAT32);
    }
-    return OffHeapFloatVectorValues.load(fieldEntry, vectorData);
+    return OffHeapFloatVectorValues.load(
+        fieldEntry.ordToDocVectorValues,
+        fieldEntry.vectorEncoding,
+        fieldEntry.dimension,
+        fieldEntry.vectorDataOffset,
+        fieldEntry.vectorDataLength,
+        vectorData);
  }

  @Override
@ -263,7 +269,13 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
              + " expected: "
              + VectorEncoding.FLOAT32);
    }
-    return OffHeapByteVectorValues.load(fieldEntry, vectorData);
+    return OffHeapByteVectorValues.load(
+        fieldEntry.ordToDocVectorValues,
+        fieldEntry.vectorEncoding,
+        fieldEntry.dimension,
+        fieldEntry.vectorDataOffset,
+        fieldEntry.vectorDataLength,
+        vectorData);
  }

  @Override
@ -277,7 +289,14 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
      return;
    }

-    OffHeapFloatVectorValues vectorValues = OffHeapFloatVectorValues.load(fieldEntry, vectorData);
+    OffHeapFloatVectorValues vectorValues =
+        OffHeapFloatVectorValues.load(
+            fieldEntry.ordToDocVectorValues,
+            fieldEntry.vectorEncoding,
+            fieldEntry.dimension,
+            fieldEntry.vectorDataOffset,
+            fieldEntry.vectorDataLength,
+            vectorData);
    RandomVectorScorer scorer =
        RandomVectorScorer.createFloats(vectorValues, fieldEntry.similarityFunction, target);
    HnswGraphSearcher.search(
@ -298,7 +317,14 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
      return;
    }

-    OffHeapByteVectorValues vectorValues = OffHeapByteVectorValues.load(fieldEntry, vectorData);
+    OffHeapByteVectorValues vectorValues =
+        OffHeapByteVectorValues.load(
+            fieldEntry.ordToDocVectorValues,
+            fieldEntry.vectorEncoding,
+            fieldEntry.dimension,
+            fieldEntry.vectorDataOffset,
+            fieldEntry.vectorDataLength,
+            vectorData);
    RandomVectorScorer scorer =
        RandomVectorScorer.createBytes(vectorValues, fieldEntry.similarityFunction, target);
    HnswGraphSearcher.search(
@ -352,22 +378,9 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
    final int offsetsBlockShift;
    final long offsetsLength;

-    // the following four variables used to read docIds encoded by IndexDISI
-    // special values of docsWithFieldOffset are -1 and -2
-    // -1 : dense
-    // -2 : empty
-    // other: sparse
-    final long docsWithFieldOffset;
-    final long docsWithFieldLength;
-    final short jumpTableEntryCount;
-    final byte denseRankPower;
-
-    // the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
-    // note that only spare case needs to store ordToDoc
-    final long addressesOffset;
-    final int blockShift;
-    final DirectMonotonicReader.Meta meta;
-    final long addressesLength;
+    // Contains the configuration for reading sparse vectors and translating vector ordinals to
+    // docId
+    OrdToDocDISIReaderConfiguration ordToDocVectorValues;

    FieldEntry(
        IndexInput input,
@ -383,24 +396,7 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
      dimension = input.readVInt();
      size = input.readInt();

-      docsWithFieldOffset = input.readLong();
-      docsWithFieldLength = input.readLong();
-      jumpTableEntryCount = input.readShort();
-      denseRankPower = input.readByte();
-
-      // dense or empty
-      if (docsWithFieldOffset == -1 || docsWithFieldOffset == -2) {
-        addressesOffset = 0;
-        blockShift = 0;
-        meta = null;
-        addressesLength = 0;
-      } else {
-        // sparse
-        addressesOffset = input.readLong();
-        blockShift = input.readVInt();
-        meta = DirectMonotonicReader.loadMeta(input, size, blockShift);
-        addressesLength = input.readLong();
-      }
+      ordToDocVectorValues = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size);

      // read nodes by level
      M = input.readVInt();
@ -441,7 +437,7 @@ public final class Lucene95HnswVectorsReader extends KnnVectorsReader implements
    public long ramBytesUsed() {
      return SHALLOW_SIZE
          + Arrays.stream(nodesByLevel).mapToLong(nodes -> RamUsageEstimator.sizeOf(nodes)).sum()
-          + RamUsageEstimator.sizeOf(meta)
+          + RamUsageEstimator.sizeOf(ordToDocVectorValues)
          + RamUsageEstimator.sizeOf(offsetsMeta);
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/Lucene95HnswVectorsWriter.java
@ -34,7 +34,6 @@ import org.apache.lucene.codecs.HnswGraphProvider;
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
-import org.apache.lucene.codecs.lucene90.IndexedDISI;
 import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
 import org.apache.lucene.index.*;
 import org.apache.lucene.index.Sorter;
@ -727,43 +726,8 @@ public final class Lucene95HnswVectorsWriter extends KnnVectorsWriter {
    // write docIDs
    int count = docsWithField.cardinality();
    meta.writeInt(count);
-    if (count == 0) {
-      meta.writeLong(-2); // docsWithFieldOffset
-      meta.writeLong(0L); // docsWithFieldLength
-      meta.writeShort((short) -1); // jumpTableEntryCount
-      meta.writeByte((byte) -1); // denseRankPower
-    } else if (count == maxDoc) {
-      meta.writeLong(-1); // docsWithFieldOffset
-      meta.writeLong(0L); // docsWithFieldLength
-      meta.writeShort((short) -1); // jumpTableEntryCount
-      meta.writeByte((byte) -1); // denseRankPower
-    } else {
-      long offset = vectorData.getFilePointer();
-      meta.writeLong(offset); // docsWithFieldOffset
-      final short jumpTableEntryCount =
-          IndexedDISI.writeBitSet(
-              docsWithField.iterator(), vectorData, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-      meta.writeLong(vectorData.getFilePointer() - offset); // docsWithFieldLength
-      meta.writeShort(jumpTableEntryCount);
-      meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
-
-      // write ordToDoc mapping
-      long start = vectorData.getFilePointer();
-      meta.writeLong(start);
-      meta.writeVInt(DIRECT_MONOTONIC_BLOCK_SHIFT);
-      // dense case and empty case do not need to store ordToMap mapping
-      final DirectMonotonicWriter ordToDocWriter =
-          DirectMonotonicWriter.getInstance(meta, vectorData, count, DIRECT_MONOTONIC_BLOCK_SHIFT);
-      DocIdSetIterator iterator = docsWithField.iterator();
-      for (int doc = iterator.nextDoc();
-          doc != DocIdSetIterator.NO_MORE_DOCS;
-          doc = iterator.nextDoc()) {
-        ordToDocWriter.add(doc);
-      }
-      ordToDocWriter.finish();
-      meta.writeLong(vectorData.getFilePointer() - start);
-    }
-
+    OrdToDocDISIReaderConfiguration.writeStoredMeta(
+        DIRECT_MONOTONIC_BLOCK_SHIFT, meta, vectorData, count, maxDoc, docsWithField);
    meta.writeVInt(M);
    // write graph nodes on each level
    if (graph == null) {
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapByteVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapByteVectorValues.java
@ -29,7 +29,7 @@ import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
 import org.apache.lucene.util.packed.DirectMonotonicReader;

 /** Read the vector values from the index input. This supports both iterated and random access. */
-abstract class OffHeapByteVectorValues extends ByteVectorValues
+public abstract class OffHeapByteVectorValues extends ByteVectorValues
    implements RandomAccessVectorValues<byte[]> {

  protected final int dimension;
@ -73,19 +73,24 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
    slice.readBytes(byteBuffer.array(), byteBuffer.arrayOffset(), byteSize);
  }

-  static OffHeapByteVectorValues load(
-      Lucene95HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
-    if (fieldEntry.docsWithFieldOffset == -2 || fieldEntry.vectorEncoding != VectorEncoding.BYTE) {
-      return new EmptyOffHeapVectorValues(fieldEntry.dimension);
+  public static OffHeapByteVectorValues load(
+      OrdToDocDISIReaderConfiguration configuration,
+      VectorEncoding vectorEncoding,
+      int dimension,
+      long vectorDataOffset,
+      long vectorDataLength,
+      IndexInput vectorData)
+      throws IOException {
+    if (configuration.docsWithFieldOffset == -2 || vectorEncoding != VectorEncoding.BYTE) {
+      return new EmptyOffHeapVectorValues(dimension);
    }
-    IndexInput bytesSlice =
-        vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
-    int byteSize = fieldEntry.dimension;
-    if (fieldEntry.docsWithFieldOffset == -1) {
-      return new DenseOffHeapVectorValues(
-          fieldEntry.dimension, fieldEntry.size, bytesSlice, byteSize);
+    IndexInput bytesSlice = vectorData.slice("vector-data", vectorDataOffset, vectorDataLength);
+    int byteSize = dimension;
+    if (configuration.docsWithFieldOffset == -1) {
+      return new DenseOffHeapVectorValues(dimension, configuration.size, bytesSlice, byteSize);
    } else {
-      return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice, byteSize);
+      return new SparseOffHeapVectorValues(
+          configuration, vectorData, bytesSlice, dimension, byteSize);
    }
  }

@ -139,29 +144,30 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues
    private final IndexedDISI disi;
    // dataIn was used to init a new IndexedDIS for #randomAccess()
    private final IndexInput dataIn;
-    private final Lucene95HnswVectorsReader.FieldEntry fieldEntry;
+    private final OrdToDocDISIReaderConfiguration configuration;

    public SparseOffHeapVectorValues(
-        Lucene95HnswVectorsReader.FieldEntry fieldEntry,
+        OrdToDocDISIReaderConfiguration configuration,
        IndexInput dataIn,
        IndexInput slice,
+        int dimension,
        int byteSize)
        throws IOException {

-      super(fieldEntry.dimension, fieldEntry.size, slice, byteSize);
-      this.fieldEntry = fieldEntry;
+      super(dimension, configuration.size, slice, byteSize);
+      this.configuration = configuration;
      final RandomAccessInput addressesData =
-          dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
+          dataIn.randomAccessSlice(configuration.addressesOffset, configuration.addressesLength);
      this.dataIn = dataIn;
-      this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
+      this.ordToDoc = DirectMonotonicReader.getInstance(configuration.meta, addressesData);
      this.disi =
          new IndexedDISI(
              dataIn,
-              fieldEntry.docsWithFieldOffset,
-              fieldEntry.docsWithFieldLength,
-              fieldEntry.jumpTableEntryCount,
-              fieldEntry.denseRankPower,
-              fieldEntry.size);
+              configuration.docsWithFieldOffset,
+              configuration.docsWithFieldLength,
+              configuration.jumpTableEntryCount,
+              configuration.denseRankPower,
+              configuration.size);
    }

    @Override
@ -187,7 +193,8 @@ abstract class OffHeapByteVectorValues extends ByteVectorValues

    @Override
    public RandomAccessVectorValues<byte[]> copy() throws IOException {
-      return new SparseOffHeapVectorValues(fieldEntry, dataIn, slice.clone(), byteSize);
+      return new SparseOffHeapVectorValues(
+          configuration, dataIn, slice.clone(), dimension, byteSize);
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapFloatVectorValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OffHeapFloatVectorValues.java
@ -28,7 +28,7 @@ import org.apache.lucene.util.hnsw.RandomAccessVectorValues;
 import org.apache.lucene.util.packed.DirectMonotonicReader;

 /** Read the vector values from the index input. This supports both iterated and random access. */
-abstract class OffHeapFloatVectorValues extends FloatVectorValues
+public abstract class OffHeapFloatVectorValues extends FloatVectorValues
    implements RandomAccessVectorValues<float[]> {

  protected final int dimension;
@ -67,20 +67,24 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
    return value;
  }

-  static OffHeapFloatVectorValues load(
-      Lucene95HnswVectorsReader.FieldEntry fieldEntry, IndexInput vectorData) throws IOException {
-    if (fieldEntry.docsWithFieldOffset == -2
-        || fieldEntry.vectorEncoding != VectorEncoding.FLOAT32) {
-      return new EmptyOffHeapVectorValues(fieldEntry.dimension);
+  public static OffHeapFloatVectorValues load(
+      OrdToDocDISIReaderConfiguration configuration,
+      VectorEncoding vectorEncoding,
+      int dimension,
+      long vectorDataOffset,
+      long vectorDataLength,
+      IndexInput vectorData)
+      throws IOException {
+    if (configuration.docsWithFieldOffset == -2 || vectorEncoding != VectorEncoding.FLOAT32) {
+      return new EmptyOffHeapVectorValues(dimension);
    }
-    IndexInput bytesSlice =
-        vectorData.slice("vector-data", fieldEntry.vectorDataOffset, fieldEntry.vectorDataLength);
-    int byteSize = fieldEntry.dimension * Float.BYTES;
-    if (fieldEntry.docsWithFieldOffset == -1) {
-      return new DenseOffHeapVectorValues(
-          fieldEntry.dimension, fieldEntry.size, bytesSlice, byteSize);
+    IndexInput bytesSlice = vectorData.slice("vector-data", vectorDataOffset, vectorDataLength);
+    int byteSize = dimension * Float.BYTES;
+    if (configuration.docsWithFieldOffset == -1) {
+      return new DenseOffHeapVectorValues(dimension, configuration.size, bytesSlice, byteSize);
    } else {
-      return new SparseOffHeapVectorValues(fieldEntry, vectorData, bytesSlice, byteSize);
+      return new SparseOffHeapVectorValues(
+          configuration, vectorData, bytesSlice, dimension, byteSize);
    }
  }

@ -134,29 +138,30 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues
    private final IndexedDISI disi;
    // dataIn was used to init a new IndexedDIS for #randomAccess()
    private final IndexInput dataIn;
-    private final Lucene95HnswVectorsReader.FieldEntry fieldEntry;
+    private final OrdToDocDISIReaderConfiguration configuration;

    public SparseOffHeapVectorValues(
-        Lucene95HnswVectorsReader.FieldEntry fieldEntry,
+        OrdToDocDISIReaderConfiguration configuration,
        IndexInput dataIn,
        IndexInput slice,
+        int dimension,
        int byteSize)
        throws IOException {

-      super(fieldEntry.dimension, fieldEntry.size, slice, byteSize);
-      this.fieldEntry = fieldEntry;
+      super(dimension, configuration.size, slice, byteSize);
+      this.configuration = configuration;
      final RandomAccessInput addressesData =
-          dataIn.randomAccessSlice(fieldEntry.addressesOffset, fieldEntry.addressesLength);
+          dataIn.randomAccessSlice(configuration.addressesOffset, configuration.addressesLength);
      this.dataIn = dataIn;
-      this.ordToDoc = DirectMonotonicReader.getInstance(fieldEntry.meta, addressesData);
+      this.ordToDoc = DirectMonotonicReader.getInstance(configuration.meta, addressesData);
      this.disi =
          new IndexedDISI(
              dataIn,
-              fieldEntry.docsWithFieldOffset,
-              fieldEntry.docsWithFieldLength,
-              fieldEntry.jumpTableEntryCount,
-              fieldEntry.denseRankPower,
-              fieldEntry.size);
+              configuration.docsWithFieldOffset,
+              configuration.docsWithFieldLength,
+              configuration.jumpTableEntryCount,
+              configuration.denseRankPower,
+              configuration.size);
    }

    @Override
@ -182,7 +187,8 @@ abstract class OffHeapFloatVectorValues extends FloatVectorValues

    @Override
    public RandomAccessVectorValues<float[]> copy() throws IOException {
-      return new SparseOffHeapVectorValues(fieldEntry, dataIn, slice.clone(), byteSize);
+      return new SparseOffHeapVectorValues(
+          configuration, dataIn, slice.clone(), dimension, byteSize);
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OrdToDocDISIReaderConfiguration.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene95/OrdToDocDISIReaderConfiguration.java
@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.codecs.lucene95;
+
+import java.io.IOException;
+import org.apache.lucene.codecs.lucene90.IndexedDISI;
+import org.apache.lucene.index.DocsWithFieldSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.Accountable;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.packed.DirectMonotonicReader;
+import org.apache.lucene.util.packed.DirectMonotonicWriter;
+
+/**
+ * Configuration for {@link DirectMonotonicReader} and {@link IndexedDISI} for reading sparse
+ * vectors. The format in the static writing methods adheres to the Lucene95HnswVectorsFormat
+ */
+public class OrdToDocDISIReaderConfiguration implements Accountable {
+
+  private static final long SHALLOW_SIZE =
+      RamUsageEstimator.shallowSizeOfInstance(OrdToDocDISIReaderConfiguration.class);
+
+  /**
+   * Writes out the docsWithField and ordToDoc mapping to the outputMeta and vectorData
+   * respectively. This is in adherence to the Lucene95HnswVectorsFormat.
+   *
+   * <p>Within outputMeta the format is as follows:
+   *
+   * <ul>
+   *   <li><b>[int8]</b> if equals to -2, empty - no vectory values. If equals to -1, dense – all
+   *       documents have values for a field. If equals to 0, sparse – some documents missing
+   *       values.
+   *   <li>DocIds were encoded by {@link IndexedDISI#writeBitSet(DocIdSetIterator, IndexOutput,
+   *       byte)}
+   *   <li>OrdToDoc was encoded by {@link org.apache.lucene.util.packed.DirectMonotonicWriter}, note
+   *       that only in sparse case
+   * </ul>
+   *
+   * <p>Within the vectorData the format is as follows:
+   *
+   * <ul>
+   *   <li>DocIds encoded by {@link IndexedDISI#writeBitSet(DocIdSetIterator, IndexOutput, byte)},
+   *       note that only in sparse case
+   *   <li>OrdToDoc was encoded by {@link org.apache.lucene.util.packed.DirectMonotonicWriter}, note
+   *       that only in sparse case
+   * </ul>
+   *
+   * @param outputMeta the outputMeta
+   * @param vectorData the vectorData
+   * @param count the count of docs with vectors
+   * @param maxDoc the maxDoc for the index
+   * @param docsWithField the docs contaiting a vector field
+   * @throws IOException thrown when writing data fails to either output
+   */
+  public static void writeStoredMeta(
+      int directMonotonicBlockShift,
+      IndexOutput outputMeta,
+      IndexOutput vectorData,
+      int count,
+      int maxDoc,
+      DocsWithFieldSet docsWithField)
+      throws IOException {
+    if (count == 0) {
+      outputMeta.writeLong(-2); // docsWithFieldOffset
+      outputMeta.writeLong(0L); // docsWithFieldLength
+      outputMeta.writeShort((short) -1); // jumpTableEntryCount
+      outputMeta.writeByte((byte) -1); // denseRankPower
+    } else if (count == maxDoc) {
+      outputMeta.writeLong(-1); // docsWithFieldOffset
+      outputMeta.writeLong(0L); // docsWithFieldLength
+      outputMeta.writeShort((short) -1); // jumpTableEntryCount
+      outputMeta.writeByte((byte) -1); // denseRankPower
+    } else {
+      long offset = vectorData.getFilePointer();
+      outputMeta.writeLong(offset); // docsWithFieldOffset
+      final short jumpTableEntryCount =
+          IndexedDISI.writeBitSet(
+              docsWithField.iterator(), vectorData, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+      outputMeta.writeLong(vectorData.getFilePointer() - offset); // docsWithFieldLength
+      outputMeta.writeShort(jumpTableEntryCount);
+      outputMeta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
+
+      // write ordToDoc mapping
+      long start = vectorData.getFilePointer();
+      outputMeta.writeLong(start);
+      outputMeta.writeVInt(directMonotonicBlockShift);
+      // dense case and empty case do not need to store ordToMap mapping
+      final DirectMonotonicWriter ordToDocWriter =
+          DirectMonotonicWriter.getInstance(
+              outputMeta, vectorData, count, directMonotonicBlockShift);
+      DocIdSetIterator iterator = docsWithField.iterator();
+      for (int doc = iterator.nextDoc();
+          doc != DocIdSetIterator.NO_MORE_DOCS;
+          doc = iterator.nextDoc()) {
+        ordToDocWriter.add(doc);
+      }
+      ordToDocWriter.finish();
+      outputMeta.writeLong(vectorData.getFilePointer() - start);
+    }
+  }
+
+  /**
+   * Reads in the necessary fields stored in the outputMeta to configure {@link
+   * DirectMonotonicReader} and {@link IndexedDISI}.
+   *
+   * @param inputMeta the inputMeta, previously written to via {@link #writeStoredMeta(int,
+   *     IndexOutput, IndexOutput, int, int, DocsWithFieldSet)}
+   * @param size The number of vectors
+   * @return the configuration required to read sparse vectors
+   * @throws IOException thrown when reading data fails
+   */
+  public static OrdToDocDISIReaderConfiguration fromStoredMeta(IndexInput inputMeta, int size)
+      throws IOException {
+    long docsWithFieldOffset = inputMeta.readLong();
+    long docsWithFieldLength = inputMeta.readLong();
+    short jumpTableEntryCount = inputMeta.readShort();
+    byte denseRankPower = inputMeta.readByte();
+    long addressesOffset = 0;
+    int blockShift = 0;
+    DirectMonotonicReader.Meta meta = null;
+    long addressesLength = 0;
+    if (docsWithFieldOffset > -1) {
+      addressesOffset = inputMeta.readLong();
+      blockShift = inputMeta.readVInt();
+      meta = DirectMonotonicReader.loadMeta(inputMeta, size, blockShift);
+      addressesLength = inputMeta.readLong();
+    }
+    return new OrdToDocDISIReaderConfiguration(
+        size,
+        jumpTableEntryCount,
+        addressesOffset,
+        addressesLength,
+        docsWithFieldOffset,
+        docsWithFieldLength,
+        denseRankPower,
+        meta);
+  }
+
+  final int size;
+  // the following four variables used to read docIds encoded by IndexDISI
+  // special values of docsWithFieldOffset are -1 and -2
+  // -1 : dense
+  // -2 : empty
+  // other: sparse
+  final short jumpTableEntryCount;
+  final long docsWithFieldOffset, docsWithFieldLength;
+  final byte denseRankPower;
+
+  // the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
+  // note that only spare case needs to store ordToDoc
+  final long addressesOffset, addressesLength;
+  final DirectMonotonicReader.Meta meta;
+
+  OrdToDocDISIReaderConfiguration(
+      int size,
+      short jumpTableEntryCount,
+      long addressesOffset,
+      long addressesLength,
+      long docsWithFieldOffset,
+      long docsWithFieldLength,
+      byte denseRankPower,
+      DirectMonotonicReader.Meta meta) {
+    this.size = size;
+    this.jumpTableEntryCount = jumpTableEntryCount;
+    this.addressesOffset = addressesOffset;
+    this.addressesLength = addressesLength;
+    this.docsWithFieldOffset = docsWithFieldOffset;
+    this.docsWithFieldLength = docsWithFieldLength;
+    this.denseRankPower = denseRankPower;
+    this.meta = meta;
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return SHALLOW_SIZE + RamUsageEstimator.sizeOf(meta);
+  }
+}