More consistently use a SEQUENTIAL ReadAdvice for merging. (#13229)

Merging `IOContext`s use a `SEQUENTIAL` `ReadAdvice`. However, some file formats hardcode `IOContext.LOAD` for some of their files, which silences the whole merging context, in particular the `SEQUENTIAL` `ReadAdvice`. This PR switches file formats to `ioContext.withReadAdvice(ReadAdvice.RANDOM_PRELOAD)` so that merges will use a `SEQUENTIAL` `ReadAdvice` while searches will use a `RANDOM_PRELOAD` `ReadAdvice`. This is not a huge deal for `RANDOM_PRELOAD`, which is only used for very small files. However, this change becomes more relevant for the new `RANDOM` `ReadAdvice` as we would like merges to keep using a `SEQUENTIAL` `ReadAdvice`.
2024-03-29 09:14:14 +01:00 · 2024-03-29 09:14:14 +01:00 · a807772d41
parent 878d233bc1
commit a807772d41
10 changed files with 48 additions and 18 deletions
--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@ -185,9 +185,10 @@ access the members using method calls instead of field accesses. Affected classe
 The `readOnce`, `load` and `random` flags on `IOContext` have been replaced with a new `ReadAdvice`
 enum.
-### IOContext.LOAD renamed to IOContext.PRELOAD
+### IOContext.LOAD removed
-`IOContext#LOAD` has been replaced with `IOContext#PRELOAD`.
+`IOContext#LOAD` has been removed, it should be replaced with
 `ioContext.toReadAdvice(ReadAdvice.RANDOM_PRELOAD)`.
 ## Migration from Lucene 9.0 to Lucene 9.1
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@ -39,8 +39,8 @@ import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.ReadAdvice;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
@ -74,7 +74,9 @@ public class FSTTermsReader extends FieldsProducer {
            state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
    this.postingsReader = postingsReader;
-    this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.PRELOAD);
+    this.fstTermsInput =
        state.directory.openInput(
            termsFileName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
    IndexInput in = this.fstTermsInput;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
@ -27,8 +27,8 @@ import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.ReadAdvice;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.bkd.BKDReader;
@ -60,7 +60,9 @@ public class Lucene90PointsReader extends PointsReader {
    boolean success = false;
    try {
-      indexIn = readState.directory.openInput(indexFileName, IOContext.PRELOAD);
+      indexIn =
          readState.directory.openInput(
              indexFileName, readState.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
      CodecUtil.checkIndexHeader(
          indexIn,
          Lucene90PointsFormat.INDEX_CODEC_NAME,
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
@ -32,8 +32,8 @@ import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.ReadAdvice;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CollectionUtil;
 import org.apache.lucene.util.IOUtils;
@ -143,7 +143,9 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
      String indexName =
          IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
-      indexIn = state.directory.openInput(indexName, IOContext.PRELOAD);
+      indexIn =
          state.directory.openInput(
              indexName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
      CodecUtil.checkIndexHeader(
          indexIn,
          TERMS_INDEX_CODEC_NAME,
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java
@ -28,6 +28,7 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RandomAccessInput;
 import org.apache.lucene.store.ReadAdvice;
 import org.apache.lucene.util.packed.DirectMonotonicReader;
 final class FieldsIndexReader extends FieldsIndex {
@ -52,7 +53,8 @@ final class FieldsIndexReader extends FieldsIndex {
      String extension,
      String codecName,
      byte[] id,
-      IndexInput metaIn)
+      IndexInput metaIn,
      IOContext context)
      throws IOException {
    maxDoc = metaIn.readInt();
    blockShift = metaIn.readInt();
@ -65,7 +67,9 @@ final class FieldsIndexReader extends FieldsIndex {
    maxPointer = metaIn.readLong();
    indexInput =
-        dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.PRELOAD);
+        dir.openInput(
            IndexFileNames.segmentFileName(name, suffix, extension),
            context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
    boolean success = false;
    try {
      CodecUtil.checkIndexHeader(
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java
@ -163,7 +163,14 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
      FieldsIndexReader fieldsIndexReader =
          new FieldsIndexReader(
-              d, si.name, segmentSuffix, INDEX_EXTENSION, INDEX_CODEC_NAME, si.getId(), metaIn);
+              d,
              si.name,
              segmentSuffix,
              INDEX_EXTENSION,
              INDEX_CODEC_NAME,
              si.getId(),
              metaIn,
              context);
      indexReader = fieldsIndexReader;
      maxPointer = fieldsIndexReader.getMaxPointer();
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java
@ -169,7 +169,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
              VECTORS_INDEX_EXTENSION,
              VECTORS_INDEX_CODEC_NAME,
              si.getId(),
-              metaIn);
+              metaIn,
              context);
      this.indexReader = fieldsIndexReader;
      this.maxPointer = fieldsIndexReader.getMaxPointer();
--- a/lucene/core/src/java/org/apache/lucene/store/IOContext.java
+++ b/lucene/core/src/java/org/apache/lucene/store/IOContext.java
@ -49,10 +49,6 @@ public record IOContext(
  public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);
  public static final IOContext PRELOAD = new IOContext(ReadAdvice.RANDOM_PRELOAD);
  public static final IOContext RANDOM = new IOContext(ReadAdvice.RANDOM);
  @SuppressWarnings("incomplete-switch")
  public IOContext {
    Objects.requireNonNull(context, "context must not be null");
@ -88,4 +84,18 @@ public record IOContext(
    // Merges read input segments sequentially.
    this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
  }
  /**
   * Return an updated {@link IOContext} that has the provided {@link ReadAdvice} if the {@link
   * Context} is a {@link Context#READ} context, otherwise return this existing instance. This helps
   * preserve a {@link ReadAdvice#SEQUENTIAL} advice for merging, which is always the right choice,
   * while allowing {@link IndexInput}s open for searching to use arbitrary {@link ReadAdvice}s.
   */
  public IOContext withReadAdvice(ReadAdvice advice) {
    if (context == Context.READ) {
      return new IOContext(advice);
    } else {
      return this;
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@ -85,7 +85,7 @@ public class MMapDirectory extends FSDirectory {
  /**
   * Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
-   * opening them if they use the {@link IOContext#PRELOAD} I/O context.
+   * opening them if they use the {@link ReadAdvice#RANDOM_PRELOAD} advice.
   */
  public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
      (filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;
--- a/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestMMapDirectory.java
@ -109,7 +109,8 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
        out.writeBytes(bytes, 0, bytes.length);
      }
-      try (final IndexInput in = dir.openInput("test", IOContext.RANDOM)) {
+      try (final IndexInput in =
          dir.openInput("test", IOContext.READ.withReadAdvice(ReadAdvice.RANDOM))) {
        final byte[] readBytes = new byte[size];
        in.readBytes(readBytes, 0, readBytes.length);
        assertArrayEquals(bytes, readBytes);