Replace boolean flags on IOContext with an enum. (#13219)

This replaces the `load`, `randomAccess` and `readOnce` flags with a
`ReadAdvice` enum, whose values are aligned with the allowed values to
(f|m)advise.

Closes #13211
This commit is contained in:
Adrien Grand 2024-03-27 09:13:41 +01:00 committed by GitHub
parent 1f909baca5
commit 8558934501
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 106 additions and 69 deletions

View File

@ -89,6 +89,9 @@ API Changes
* GITHUB#13205: Convert IOContext, MergeInfo, and FlushInfo to record classes. (Uwe Schindler)
* GITHUB#13219: The `readOnce`, `load` and `random` flags on `IOContext` have
been replaced with a new `ReadAdvice` enum. (Adrien Grand)
New Features
---------------------

View File

@ -172,6 +172,15 @@ access the members using method calls instead of field accesses. Affected classe
- `IOContext`, `MergeInfo`, and `FlushInfo` (GITHUB#13205)
### Boolean flags on IOContext replaced with a new ReadAdvice enum.
The `readOnce`, `load` and `random` flags on `IOContext` have been replaced with a new `ReadAdvice`
enum.
### IOContext.LOAD renamed to IOContext.PRELOAD
`IOContext#LOAD` has been replaced with `IOContext#PRELOAD`.
## Migration from Lucene 9.0 to Lucene 9.1
### Test framework package migration and module (LUCENE-10301)

View File

@ -27,6 +27,7 @@ import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
@ -53,7 +54,7 @@ public class VariableGapTermsIndexReader extends TermsIndexReaderBase {
state.segmentInfo.name,
state.segmentSuffix,
VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
final IndexInput in = state.directory.openInput(fileName, state.context.toReadOnce());
final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE);
boolean success = false;
try {

View File

@ -74,7 +74,7 @@ public class FSTTermsReader extends FieldsProducer {
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.LOAD);
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.PRELOAD);
IndexInput in = this.fstTermsInput;

View File

@ -60,7 +60,7 @@ public class Lucene90PointsReader extends PointsReader {
boolean success = false;
try {
indexIn = readState.directory.openInput(indexFileName, IOContext.LOAD);
indexIn = readState.directory.openInput(indexFileName, IOContext.PRELOAD);
CodecUtil.checkIndexHeader(
indexIn,
Lucene90PointsFormat.INDEX_CODEC_NAME,

View File

@ -143,7 +143,7 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
String indexName =
IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
indexIn = state.directory.openInput(indexName, IOContext.LOAD);
indexIn = state.directory.openInput(indexName, IOContext.PRELOAD);
CodecUtil.checkIndexHeader(
indexIn,
TERMS_INDEX_CODEC_NAME,

View File

@ -65,7 +65,7 @@ final class FieldsIndexReader extends FieldsIndex {
maxPointer = metaIn.readLong();
indexInput =
dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.LOAD);
dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.PRELOAD);
boolean success = false;
try {
CodecUtil.checkIndexHeader(

View File

@ -27,22 +27,10 @@ import java.util.Objects;
* @param context An object of a enumerator Context type
* @param mergeInfo must be given when {@code context == MERGE}
* @param flushInfo must be given when {@code context == FLUSH}
* @param readOnce This flag indicates that the file will be opened, then fully read sequentially
* then closed.
* @param load This flag is used for files that are a small fraction of the total index size and are
* expected to be heavily accessed in random-access fashion. Some {@link Directory}
* implementations may choose to load such files into physical memory (e.g. Java heap) as a way
* to provide stronger guarantees on query latency.
* @param randomAccess This flag indicates that the file will be accessed randomly. If this flag is
* set, then readOnce will be false.
* @param readAdvice Advice regarding the read access pattern
*/
public record IOContext(
Context context,
MergeInfo mergeInfo,
FlushInfo flushInfo,
boolean readOnce,
boolean load,
boolean randomAccess) {
Context context, MergeInfo mergeInfo, FlushInfo flushInfo, ReadAdvice readAdvice) {
/**
* Context is a enumerator which specifies the context in which the Directory is being used for.
@ -54,58 +42,50 @@ public record IOContext(
DEFAULT
};
public static final IOContext DEFAULT = new IOContext(Context.DEFAULT);
public static final IOContext DEFAULT =
new IOContext(Context.DEFAULT, null, null, ReadAdvice.NORMAL);
public static final IOContext READONCE = new IOContext(true, false, false);
public static final IOContext READONCE = new IOContext(ReadAdvice.SEQUENTIAL);
public static final IOContext READ = new IOContext(false, false, false);
public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);
public static final IOContext LOAD = new IOContext(false, true, true);
public static final IOContext PRELOAD = new IOContext(ReadAdvice.RANDOM_PRELOAD);
public static final IOContext RANDOM = new IOContext(false, false, true);
public static final IOContext RANDOM = new IOContext(ReadAdvice.RANDOM);
@SuppressWarnings("incomplete-switch")
public IOContext {
Objects.requireNonNull(context, "context must not be null");
Objects.requireNonNull(readAdvice, "readAdvice must not be null");
switch (context) {
case MERGE -> Objects.requireNonNull(
mergeInfo, "mergeInfo must not be null if context is MERGE");
case FLUSH -> Objects.requireNonNull(
flushInfo, "flushInfo must not be null if context is FLUSH");
}
if (load && readOnce) {
throw new IllegalArgumentException("load and readOnce are mutually exclusive");
if (context == Context.MERGE && readAdvice != ReadAdvice.SEQUENTIAL) {
throw new IllegalArgumentException(
"The MERGE context must use the SEQUENTIAL read access advice");
}
if (readOnce && randomAccess) {
throw new IllegalArgumentException("readOnce and randomAccess are mutually exclusive");
}
if (load && randomAccess == false) {
throw new IllegalArgumentException("cannot be load but not randomAccess");
if ((context == Context.FLUSH || context == Context.DEFAULT)
&& readAdvice != ReadAdvice.NORMAL) {
throw new IllegalArgumentException(
"The FLUSH and DEFAULT contexts must use the NORMAL read access advice");
}
}
private IOContext(boolean readOnce, boolean load, boolean randomAccess) {
this(Context.READ, null, null, readOnce, load, randomAccess);
}
private IOContext(Context context) {
this(context, null, null, false, false, false);
private IOContext(ReadAdvice accessAdvice) {
this(Context.READ, null, null, accessAdvice);
}
/** Creates an IOContext for flushing. */
public IOContext(FlushInfo flushInfo) {
this(Context.FLUSH, null, flushInfo, false, false, false);
this(Context.FLUSH, null, flushInfo, ReadAdvice.NORMAL);
}
/** Creates an IOContext for merging. */
public IOContext(MergeInfo mergeInfo) {
this(Context.MERGE, mergeInfo, null, false, false, false);
}
/**
* Return a copy of this IOContext with {@link #readOnce} set to {@code true}. The {@link #load}
* flag is set to {@code false}.
*/
public IOContext toReadOnce() {
return new IOContext(context, mergeInfo, flushInfo, true, false, randomAccess);
// Merges read input segments sequentially.
this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
}
}

View File

@ -85,10 +85,10 @@ public class MMapDirectory extends FSDirectory {
/**
* Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
* opening them if they use the {@link IOContext#LOAD} I/O context.
* opening them if they use the {@link IOContext#PRELOAD} I/O context.
*/
public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
(filename, context) -> context.load();
(filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;
private BiPredicate<String, IOContext> preload = NO_FILES;

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.store;
/** Advice regarding the read access pattern. */
public enum ReadAdvice {
/**
* Normal behavior. Data is expected to be read mostly sequentially. The system is expected to
* cache the hottest pages.
*/
NORMAL,
/**
* Data is expected to be read in a random-access fashion, either by {@link IndexInput#seek(long)
* seeking} often and reading relatively short sequences of bytes at once, or by reading data
* through the {@link RandomAccessInput} abstraction in random order.
*/
RANDOM,
/**
* Data is expected to be read sequentially with very little seeking at most. The system may read
* ahead aggressively and free pages soon after they are accessed.
*/
SEQUENTIAL,
/**
* Data is treated as random-access memory in practice. {@link Directory} implementations may
* explicitly load the content of the file in memory, or provide hints to the system so that it
* loads the content of the file into the page cache at open time. This should only be used on
* very small files that can be expected to fit in RAM with very high confidence.
*/
RANDOM_PRELOAD
}

View File

@ -52,7 +52,14 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
MemorySegmentIndexInput.newInstance(
resourceDescription,
arena,
map(arena, resourceDescription, fc, context, chunkSizePower, preload, fileSize),
map(
arena,
resourceDescription,
fc,
context.readAdvice(),
chunkSizePower,
preload,
fileSize),
fileSize,
chunkSizePower);
success = true;
@ -78,7 +85,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
Arena arena,
String resourceDescription,
FileChannel fc,
IOContext context,
ReadAdvice readAdvice,
int chunkSizePower,
boolean preload,
long length)
@ -108,7 +115,7 @@ final class MemorySegmentIndexInputProvider implements MMapDirectory.MMapIndexIn
if (preload) {
segment.load();
} else if (nativeAccess.isPresent() && chunkSizePower >= 21) {
nativeAccess.get().madvise(segment, context);
nativeAccess.get().madvise(segment, readAdvice);
}
segments[segNr] = segment;
startOffset += segSize;

View File

@ -25,7 +25,7 @@ import org.apache.lucene.util.Constants;
abstract class NativeAccess {
/** Invoke the {@code madvise} call for the given {@link MemorySegment}. */
public abstract void madvise(MemorySegment segment, IOContext context) throws IOException;
public abstract void madvise(MemorySegment segment, ReadAdvice readAdvice) throws IOException;
/**
* Return the NativeAccess instance for this platform. At moment we only support Linux and MacOS

View File

@ -26,7 +26,6 @@ import java.lang.invoke.MethodHandle;
import java.util.Locale;
import java.util.Optional;
import java.util.logging.Logger;
import org.apache.lucene.store.IOContext.Context;
@SuppressWarnings("preview")
final class PosixNativeAccess extends NativeAccess {
@ -110,12 +109,12 @@ final class PosixNativeAccess extends NativeAccess {
}
@Override
public void madvise(MemorySegment segment, IOContext context) throws IOException {
public void madvise(MemorySegment segment, ReadAdvice readAdvice) throws IOException {
// Note: madvise is bypassed if the segment should be preloaded via MemorySegment#load.
if (segment.byteSize() == 0L) {
return; // empty segments should be excluded, because they may have no address at all
}
final Integer advice = mapIOContext(context);
final Integer advice = mapReadAdvice(readAdvice);
if (advice == null) {
return; // do nothing
}
@ -136,18 +135,12 @@ final class PosixNativeAccess extends NativeAccess {
}
}
private Integer mapIOContext(IOContext ctx) {
// Merging always wins and implies sequential access, because kernel is advised to free pages
// after use:
if (ctx.context() == Context.MERGE) {
return POSIX_MADV_SEQUENTIAL;
}
if (ctx.randomAccess()) {
return POSIX_MADV_RANDOM;
}
if (ctx.readOnce()) {
return POSIX_MADV_SEQUENTIAL;
}
return null;
private Integer mapReadAdvice(ReadAdvice readAdvice) {
return switch (readAdvice) {
case NORMAL -> null;
case RANDOM -> POSIX_MADV_RANDOM;
case SEQUENTIAL -> POSIX_MADV_SEQUENTIAL;
case RANDOM_PRELOAD -> null;
};
}
}