Replace boolean flags on IOContext with an enum. (#13219)

This replaces the `load`, `randomAccess` and `readOnce` flags with a `ReadAdvice` enum, whose values are aligned with the allowed values to (f|m)advise. Closes #13211
2025-03-06 16:29:30 +00:00 · 2024-03-27 09:13:41 +01:00 · 2024-03-27 09:13:41 +01:00 · 8558934501
commit 8558934501
parent 1f909baca5
13 changed files with 106 additions and 69 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -89,6 +89,9 @@ API Changes

 * GITHUB#13205: Convert IOContext, MergeInfo, and FlushInfo to record classes.  (Uwe Schindler)

+* GITHUB#13219: The `readOnce`, `load` and `random` flags on `IOContext` have
+  been replaced with a new `ReadAdvice` enum. (Adrien Grand)
+
 New Features
 ---------------------

--- a/lucene/MIGRATE.md
+++ b/lucene/MIGRATE.md
@ -172,6 +172,15 @@ access the members using method calls instead of field accesses. Affected classe

 - `IOContext`, `MergeInfo`, and `FlushInfo` (GITHUB#13205)

+### Boolean flags on IOContext replaced with a new ReadAdvice enum.
+
+The `readOnce`, `load` and `random` flags on `IOContext` have been replaced with a new `ReadAdvice`
+enum.
+
+### IOContext.LOAD renamed to IOContext.PRELOAD
+
+`IOContext#LOAD` has been replaced with `IOContext#PRELOAD`.
+
 ## Migration from Lucene 9.0 to Lucene 9.1

 ### Test framework package migration and module (LUCENE-10301)
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/VariableGapTermsIndexReader.java
@ -27,6 +27,7 @@ import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Accountables;
@ -53,7 +54,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
            state.segmentInfo.name,
            state.segmentSuffix,
            VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
-    final IndexInput in = state.directory.openInput(fileName, state.context.toReadOnce());
+    final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE);
    boolean success = false;

    try {
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@ -74,7 +74,7 @@ public class FSTTermsReader extends FieldsProducer {
            state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

    this.postingsReader = postingsReader;
-    this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.LOAD);
+    this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.PRELOAD);

    IndexInput in = this.fstTermsInput;

--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
@ -60,7 +60,7 @@ public class Lucene90PointsReader extends PointsReader {

    boolean success = false;
    try {
-      indexIn = readState.directory.openInput(indexFileName, IOContext.LOAD);
+      indexIn = readState.directory.openInput(indexFileName, IOContext.PRELOAD);
      CodecUtil.checkIndexHeader(
          indexIn,
          Lucene90PointsFormat.INDEX_CODEC_NAME,
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/blocktree/Lucene90BlockTreeTermsReader.java
@ -143,7 +143,7 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {

      String indexName =
          IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
-      indexIn = state.directory.openInput(indexName, IOContext.LOAD);
+      indexIn = state.directory.openInput(indexName, IOContext.PRELOAD);
      CodecUtil.checkIndexHeader(
          indexIn,
          TERMS_INDEX_CODEC_NAME,
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/FieldsIndexReader.java
@ -65,7 +65,7 @@ final class FieldsIndexReader extends FieldsIndex {
    maxPointer = metaIn.readLong();

    indexInput =
-        dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.LOAD);
+        dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.PRELOAD);
    boolean success = false;
    try {
      CodecUtil.checkIndexHeader(
--- a/lucene/core/src/java/org/apache/lucene/store/IOContext.java
+++ b/lucene/core/src/java/org/apache/lucene/store/IOContext.java
@ -27,22 +27,10 @@ import java.util.Objects;
 * @param context An object of a enumerator Context type
 * @param mergeInfo must be given when {@code context == MERGE}
 * @param flushInfo must be given when {@code context == FLUSH}
- * @param readOnce This flag indicates that the file will be opened, then fully read sequentially
- *     then closed.
- * @param load This flag is used for files that are a small fraction of the total index size and are
- *     expected to be heavily accessed in random-access fashion. Some {@link Directory}
- *     implementations may choose to load such files into physical memory (e.g. Java heap) as a way
- *     to provide stronger guarantees on query latency.
- * @param randomAccess This flag indicates that the file will be accessed randomly. If this flag is
- *     set, then readOnce will be false.
+ * @param readAdvice Advice regarding the read access pattern
 */
 public record IOContext(
-    Context context,
-    MergeInfo mergeInfo,
-    FlushInfo flushInfo,
-    boolean readOnce,
-    boolean load,
-    boolean randomAccess) {
+    Context context, MergeInfo mergeInfo, FlushInfo flushInfo, ReadAdvice readAdvice) {

  /**
   * Context is a enumerator which specifies the context in which the Directory is being used for.
@ -54,58 +42,50 @@ public record IOContext(
    DEFAULT
  };

-  public static final IOContext DEFAULT = new IOContext(Context.DEFAULT);
+  public static final IOContext DEFAULT =
+      new IOContext(Context.DEFAULT, null, null, ReadAdvice.NORMAL);

-  public static final IOContext READONCE = new IOContext(true, false, false);
+  public static final IOContext READONCE = new IOContext(ReadAdvice.SEQUENTIAL);

-  public static final IOContext READ = new IOContext(false, false, false);
+  public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);

-  public static final IOContext LOAD = new IOContext(false, true, true);
+  public static final IOContext PRELOAD = new IOContext(ReadAdvice.RANDOM_PRELOAD);

-  public static final IOContext RANDOM = new IOContext(false, false, true);
+  public static final IOContext RANDOM = new IOContext(ReadAdvice.RANDOM);

  @SuppressWarnings("incomplete-switch")
  public IOContext {
+    Objects.requireNonNull(context, "context must not be null");
+    Objects.requireNonNull(readAdvice, "readAdvice must not be null");
    switch (context) {
      case MERGE -> Objects.requireNonNull(
          mergeInfo, "mergeInfo must not be null if context is MERGE");
      case FLUSH -> Objects.requireNonNull(
          flushInfo, "flushInfo must not be null if context is FLUSH");
    }
-    if (load && readOnce) {
-      throw new IllegalArgumentException("load and readOnce are mutually exclusive");
+    if (context == Context.MERGE && readAdvice != ReadAdvice.SEQUENTIAL) {
+      throw new IllegalArgumentException(
+          "The MERGE context must use the SEQUENTIAL read access advice");
    }
-    if (readOnce && randomAccess) {
-      throw new IllegalArgumentException("readOnce and randomAccess are mutually exclusive");
-    }
-    if (load && randomAccess == false) {
-      throw new IllegalArgumentException("cannot be load but not randomAccess");
+    if ((context == Context.FLUSH || context == Context.DEFAULT)
+        && readAdvice != ReadAdvice.NORMAL) {
+      throw new IllegalArgumentException(
+          "The FLUSH and DEFAULT contexts must use the NORMAL read access advice");
    }
  }

-  private IOContext(boolean readOnce, boolean load, boolean randomAccess) {
-    this(Context.READ, null, null, readOnce, load, randomAccess);
-  }
-
-  private IOContext(Context context) {
-    this(context, null, null, false, false, false);
+  private IOContext(ReadAdvice accessAdvice) {
+    this(Context.READ, null, null, accessAdvice);
  }

  /** Creates an IOContext for flushing. */
  public IOContext(FlushInfo flushInfo) {
-    this(Context.FLUSH, null, flushInfo, false, false, false);
+    this(Context.FLUSH, null, flushInfo, ReadAdvice.NORMAL);
  }

  /** Creates an IOContext for merging. */
  public IOContext(MergeInfo mergeInfo) {
-    this(Context.MERGE, mergeInfo, null, false, false, false);
-  }
-
-  /**
-   * Return a copy of this IOContext with {@link #readOnce} set to {@code true}. The {@link #load}
-   * flag is set to {@code false}.
-   */
-  public IOContext toReadOnce() {
-    return new IOContext(context, mergeInfo, flushInfo, true, false, randomAccess);
+    // Merges read input segments sequentially.
+    this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@ -85,10 +85,10 @@ public class MMapDirectory extends FSDirectory {

  /**
   * Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
-   * opening them if they use the {@link IOContext#LOAD} I/O context.
+   * opening them if they use the {@link IOContext#PRELOAD} I/O context.
   */
  public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
-      (filename, context) -> context.load();
+      (filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;

  private BiPredicate<String, IOContext> preload = NO_FILES;

--- a/lucene/core/src/java/org/apache/lucene/store/ReadAdvice.java
+++ b/lucene/core/src/java/org/apache/lucene/store/ReadAdvice.java
@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.store;
+
+/** Advice regarding the read access pattern. */
+public enum ReadAdvice {
+  /**
+   * Normal behavior. Data is expected to be read mostly sequentially. The system is expected to
+   * cache the hottest pages.
+   */
+  NORMAL,
+  /**
+   * Data is expected to be read in a random-access fashion, either by {@link IndexInput#seek(long)
+   * seeking} often and reading relatively short sequences of bytes at once, or by reading data
+   * through the {@link RandomAccessInput} abstraction in random order.
+   */
+  RANDOM,
+  /**
+   * Data is expected to be read sequentially with very little seeking at most. The system may read
+   * ahead aggressively and free pages soon after they are accessed.
+   */
+  SEQUENTIAL,
+  /**
+   * Data is treated as random-access memory in practice. {@link Directory} implementations may
+   * explicitly load the content of the file in memory, or provide hints to the system so that it
+   * loads the content of the file into the page cache at open time. This should only be used on
+   * very small files that can be expected to fit in RAM with very high confidence.
+   */
+  RANDOM_PRELOAD
+}
--- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInputProvider.java
@ -52,7 +52,14 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
          MemorySegmentIndexInput.newInstance(
              resourceDescription,
              arena,
-              map(arena, resourceDescription, fc, context, chunkSizePower, preload, fileSize),
+              map(
+                  arena,
+                  resourceDescription,
+                  fc,
+                  context.readAdvice(),
+                  chunkSizePower,
+                  preload,
+                  fileSize),
              fileSize,
              chunkSizePower);
      success = true;
@ -78,7 +85,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
      Arena arena,
      String resourceDescription,
      FileChannel fc,
-      IOContext context,
+      ReadAdvice readAdvice,
      int chunkSizePower,
      boolean preload,
      long length)
@ -108,7 +115,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
      if (preload) {
        segment.load();
      } else if (nativeAccess.isPresent() && chunkSizePower >= 21) {
-        nativeAccess.get().madvise(segment, context);
+        nativeAccess.get().madvise(segment, readAdvice);
      }
      segments[segNr] = segment;
      startOffset += segSize;
--- a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java
@ -25,7 +25,7 @@ import org.apache.lucene.util.Constants;
 abstract class NativeAccess {

  /** Invoke the {@code madvise} call for the given {@link MemorySegment}. */
-  public abstract void madvise(MemorySegment segment, IOContext context) throws IOException;
+  public abstract void madvise(MemorySegment segment, ReadAdvice readAdvice) throws IOException;

  /**
   * Return the NativeAccess instance for this platform. At moment we only support Linux and MacOS
--- a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java
+++ b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java
@ -26,7 +26,6 @@ import java.lang.invoke.MethodHandle;
 import java.util.Locale;
 import java.util.Optional;
 import java.util.logging.Logger;
-import org.apache.lucene.store.IOContext.Context;

@SuppressWarnings("preview")
 final class PosixNativeAccess extends NativeAccess {
@ -110,12 +109,12 @@ final class PosixNativeAccess extends NativeAccess {
  }

  @Override
-  public void madvise(MemorySegment segment, IOContext context) throws IOException {
+  public void madvise(MemorySegment segment, ReadAdvice readAdvice) throws IOException {
    // Note: madvise is bypassed if the segment should be preloaded via MemorySegment#load.
    if (segment.byteSize() == 0L) {
      return; // empty segments should be excluded, because they may have no address at all
    }
-    final Integer advice = mapIOContext(context);
+    final Integer advice = mapReadAdvice(readAdvice);
    if (advice == null) {
      return; // do nothing
    }
@ -136,18 +135,12 @@ final class PosixNativeAccess extends NativeAccess {
    }
  }

-  private Integer mapIOContext(IOContext ctx) {
-    // Merging always wins and implies sequential access, because kernel is advised to free pages
-    // after use:
-    if (ctx.context() == Context.MERGE) {
-      return POSIX_MADV_SEQUENTIAL;
-    }
-    if (ctx.randomAccess()) {
-      return POSIX_MADV_RANDOM;
-    }
-    if (ctx.readOnce()) {
-      return POSIX_MADV_SEQUENTIAL;
-    }
-    return null;
+  private Integer mapReadAdvice(ReadAdvice readAdvice) {
+    return switch (readAdvice) {
+      case NORMAL -> null;
+      case RANDOM -> POSIX_MADV_RANDOM;
+      case SEQUENTIAL -> POSIX_MADV_SEQUENTIAL;
+      case RANDOM_PRELOAD -> null;
+    };
  }
 }