More consistently use a SEQUENTIAL ReadAdvice for merging. (#13229)

Merging `IOContext`s use a `SEQUENTIAL` `ReadAdvice`. However, some file
formats hardcode `IOContext.LOAD` for some of their files, which silences the
whole merging context, in particular the `SEQUENTIAL` `ReadAdvice`.

This PR switches file formats to
`ioContext.withReadAdvice(ReadAdvice.RANDOM_PRELOAD)` so that merges will use a
`SEQUENTIAL` `ReadAdvice` while searches will use a `RANDOM_PRELOAD`
`ReadAdvice`.

This is not a huge deal for `RANDOM_PRELOAD`, which is only used for very small
files. However, this change becomes more relevant for the new `RANDOM`
`ReadAdvice` as we would like merges to keep using a `SEQUENTIAL` `ReadAdvice`.
This commit is contained in:
Adrien Grand 2024-03-29 09:14:14 +01:00 committed by GitHub
parent 878d233bc1
commit a807772d41
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 48 additions and 18 deletions

View File

@ -185,9 +185,10 @@ access the members using method calls instead of field accesses. Affected classe
The `readOnce`, `load` and `random` flags on `IOContext` have been replaced with a new `ReadAdvice`
enum.
### IOContext.LOAD renamed to IOContext.PRELOAD
### IOContext.LOAD removed
`IOContext#LOAD` has been replaced with `IOContext#PRELOAD`.
`IOContext#LOAD` has been removed, it should be replaced with
`ioContext.toReadAdvice(ReadAdvice.RANDOM_PRELOAD)`.
## Migration from Lucene 9.0 to Lucene 9.1

View File

@ -39,8 +39,8 @@ import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
@ -74,7 +74,9 @@ public class FSTTermsReader extends FieldsProducer {
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
this.postingsReader = postingsReader;
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.PRELOAD);
this.fstTermsInput =
state.directory.openInput(
termsFileName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
IndexInput in = this.fstTermsInput;

View File

@ -27,8 +27,8 @@ import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
@ -60,7 +60,9 @@ public class Lucene90PointsReader extends PointsReader {
boolean success = false;
try {
indexIn = readState.directory.openInput(indexFileName, IOContext.PRELOAD);
indexIn =
readState.directory.openInput(
indexFileName, readState.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
CodecUtil.checkIndexHeader(
indexIn,
Lucene90PointsFormat.INDEX_CODEC_NAME,

View File

@ -32,8 +32,8 @@ import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.IOUtils;
@ -143,7 +143,9 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
String indexName =
IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
indexIn = state.directory.openInput(indexName, IOContext.PRELOAD);
indexIn =
state.directory.openInput(
indexName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
CodecUtil.checkIndexHeader(
indexIn,
TERMS_INDEX_CODEC_NAME,

View File

@ -28,6 +28,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.packed.DirectMonotonicReader;
final class FieldsIndexReader extends FieldsIndex {
@ -52,7 +53,8 @@ final class FieldsIndexReader extends FieldsIndex {
String extension,
String codecName,
byte[] id,
IndexInput metaIn)
IndexInput metaIn,
IOContext context)
throws IOException {
maxDoc = metaIn.readInt();
blockShift = metaIn.readInt();
@ -65,7 +67,9 @@ final class FieldsIndexReader extends FieldsIndex {
maxPointer = metaIn.readLong();
indexInput =
dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.PRELOAD);
dir.openInput(
IndexFileNames.segmentFileName(name, suffix, extension),
context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
boolean success = false;
try {
CodecUtil.checkIndexHeader(

View File

@ -163,7 +163,14 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
FieldsIndexReader fieldsIndexReader =
new FieldsIndexReader(
d, si.name, segmentSuffix, INDEX_EXTENSION, INDEX_CODEC_NAME, si.getId(), metaIn);
d,
si.name,
segmentSuffix,
INDEX_EXTENSION,
INDEX_CODEC_NAME,
si.getId(),
metaIn,
context);
indexReader = fieldsIndexReader;
maxPointer = fieldsIndexReader.getMaxPointer();

View File

@ -169,7 +169,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
VECTORS_INDEX_EXTENSION,
VECTORS_INDEX_CODEC_NAME,
si.getId(),
metaIn);
metaIn,
context);
this.indexReader = fieldsIndexReader;
this.maxPointer = fieldsIndexReader.getMaxPointer();

View File

@ -49,10 +49,6 @@ public record IOContext(
public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);
public static final IOContext PRELOAD = new IOContext(ReadAdvice.RANDOM_PRELOAD);
public static final IOContext RANDOM = new IOContext(ReadAdvice.RANDOM);
@SuppressWarnings("incomplete-switch")
public IOContext {
Objects.requireNonNull(context, "context must not be null");
@ -88,4 +84,18 @@ public record IOContext(
// Merges read input segments sequentially.
this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
}
/**
* Return an updated {@link IOContext} that has the provided {@link ReadAdvice} if the {@link
* Context} is a {@link Context#READ} context, otherwise return this existing instance. This helps
* preserve a {@link ReadAdvice#SEQUENTIAL} advice for merging, which is always the right choice,
* while allowing {@link IndexInput}s open for searching to use arbitrary {@link ReadAdvice}s.
*/
public IOContext withReadAdvice(ReadAdvice advice) {
if (context == Context.READ) {
return new IOContext(advice);
} else {
return this;
}
}
}

View File

@ -85,7 +85,7 @@ public class MMapDirectory extends FSDirectory {
/**
* Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
* opening them if they use the {@link IOContext#PRELOAD} I/O context.
* opening them if they use the {@link ReadAdvice#RANDOM_PRELOAD} advice.
*/
public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
(filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;

View File

@ -109,7 +109,8 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
out.writeBytes(bytes, 0, bytes.length);
}
try (final IndexInput in = dir.openInput("test", IOContext.RANDOM)) {
try (final IndexInput in =
dir.openInput("test", IOContext.READ.withReadAdvice(ReadAdvice.RANDOM))) {
final byte[] readBytes = new byte[size];
in.readBytes(readBytes, 0, readBytes.length);
assertArrayEquals(bytes, readBytes);