mirror of https://github.com/apache/lucene.git
More consistently use a SEQUENTIAL ReadAdvice for merging. (#13229)
Merging `IOContext`s use a `SEQUENTIAL` `ReadAdvice`. However, some file formats hardcode `IOContext.LOAD` for some of their files, which silences the whole merging context, in particular the `SEQUENTIAL` `ReadAdvice`. This PR switches file formats to `ioContext.withReadAdvice(ReadAdvice.RANDOM_PRELOAD)` so that merges will use a `SEQUENTIAL` `ReadAdvice` while searches will use a `RANDOM_PRELOAD` `ReadAdvice`. This is not a huge deal for `RANDOM_PRELOAD`, which is only used for very small files. However, this change becomes more relevant for the new `RANDOM` `ReadAdvice` as we would like merges to keep using a `SEQUENTIAL` `ReadAdvice`.
This commit is contained in:
parent
878d233bc1
commit
a807772d41
|
@ -185,9 +185,10 @@ access the members using method calls instead of field accesses. Affected classe
|
|||
The `readOnce`, `load` and `random` flags on `IOContext` have been replaced with a new `ReadAdvice`
|
||||
enum.
|
||||
|
||||
### IOContext.LOAD renamed to IOContext.PRELOAD
|
||||
### IOContext.LOAD removed
|
||||
|
||||
`IOContext#LOAD` has been replaced with `IOContext#PRELOAD`.
|
||||
`IOContext#LOAD` has been removed, it should be replaced with
|
||||
`ioContext.toReadAdvice(ReadAdvice.RANDOM_PRELOAD)`.
|
||||
|
||||
## Migration from Lucene 9.0 to Lucene 9.1
|
||||
|
||||
|
|
|
@ -39,8 +39,8 @@ import org.apache.lucene.index.TermState;
|
|||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.ReadAdvice;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
|
@ -74,7 +74,9 @@ public class FSTTermsReader extends FieldsProducer {
|
|||
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
|
||||
|
||||
this.postingsReader = postingsReader;
|
||||
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.PRELOAD);
|
||||
this.fstTermsInput =
|
||||
state.directory.openInput(
|
||||
termsFileName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||
|
||||
IndexInput in = this.fstTermsInput;
|
||||
|
||||
|
|
|
@ -27,8 +27,8 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.ReadAdvice;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.bkd.BKDReader;
|
||||
|
||||
|
@ -60,7 +60,9 @@ public class Lucene90PointsReader extends PointsReader {
|
|||
|
||||
boolean success = false;
|
||||
try {
|
||||
indexIn = readState.directory.openInput(indexFileName, IOContext.PRELOAD);
|
||||
indexIn =
|
||||
readState.directory.openInput(
|
||||
indexFileName, readState.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||
CodecUtil.checkIndexHeader(
|
||||
indexIn,
|
||||
Lucene90PointsFormat.INDEX_CODEC_NAME,
|
||||
|
|
|
@ -32,8 +32,8 @@ import org.apache.lucene.index.IndexOptions;
|
|||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.ReadAdvice;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
@ -143,7 +143,9 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
String indexName =
|
||||
IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
|
||||
indexIn = state.directory.openInput(indexName, IOContext.PRELOAD);
|
||||
indexIn =
|
||||
state.directory.openInput(
|
||||
indexName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||
CodecUtil.checkIndexHeader(
|
||||
indexIn,
|
||||
TERMS_INDEX_CODEC_NAME,
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
import org.apache.lucene.store.ReadAdvice;
|
||||
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
||||
|
||||
final class FieldsIndexReader extends FieldsIndex {
|
||||
|
@ -52,7 +53,8 @@ final class FieldsIndexReader extends FieldsIndex {
|
|||
String extension,
|
||||
String codecName,
|
||||
byte[] id,
|
||||
IndexInput metaIn)
|
||||
IndexInput metaIn,
|
||||
IOContext context)
|
||||
throws IOException {
|
||||
maxDoc = metaIn.readInt();
|
||||
blockShift = metaIn.readInt();
|
||||
|
@ -65,7 +67,9 @@ final class FieldsIndexReader extends FieldsIndex {
|
|||
maxPointer = metaIn.readLong();
|
||||
|
||||
indexInput =
|
||||
dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.PRELOAD);
|
||||
dir.openInput(
|
||||
IndexFileNames.segmentFileName(name, suffix, extension),
|
||||
context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||
boolean success = false;
|
||||
try {
|
||||
CodecUtil.checkIndexHeader(
|
||||
|
|
|
@ -163,7 +163,14 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
|
|||
|
||||
FieldsIndexReader fieldsIndexReader =
|
||||
new FieldsIndexReader(
|
||||
d, si.name, segmentSuffix, INDEX_EXTENSION, INDEX_CODEC_NAME, si.getId(), metaIn);
|
||||
d,
|
||||
si.name,
|
||||
segmentSuffix,
|
||||
INDEX_EXTENSION,
|
||||
INDEX_CODEC_NAME,
|
||||
si.getId(),
|
||||
metaIn,
|
||||
context);
|
||||
indexReader = fieldsIndexReader;
|
||||
maxPointer = fieldsIndexReader.getMaxPointer();
|
||||
|
||||
|
|
|
@ -169,7 +169,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
|||
VECTORS_INDEX_EXTENSION,
|
||||
VECTORS_INDEX_CODEC_NAME,
|
||||
si.getId(),
|
||||
metaIn);
|
||||
metaIn,
|
||||
context);
|
||||
|
||||
this.indexReader = fieldsIndexReader;
|
||||
this.maxPointer = fieldsIndexReader.getMaxPointer();
|
||||
|
|
|
@ -49,10 +49,6 @@ public record IOContext(
|
|||
|
||||
public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);
|
||||
|
||||
public static final IOContext PRELOAD = new IOContext(ReadAdvice.RANDOM_PRELOAD);
|
||||
|
||||
public static final IOContext RANDOM = new IOContext(ReadAdvice.RANDOM);
|
||||
|
||||
@SuppressWarnings("incomplete-switch")
|
||||
public IOContext {
|
||||
Objects.requireNonNull(context, "context must not be null");
|
||||
|
@ -88,4 +84,18 @@ public record IOContext(
|
|||
// Merges read input segments sequentially.
|
||||
this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an updated {@link IOContext} that has the provided {@link ReadAdvice} if the {@link
|
||||
* Context} is a {@link Context#READ} context, otherwise return this existing instance. This helps
|
||||
* preserve a {@link ReadAdvice#SEQUENTIAL} advice for merging, which is always the right choice,
|
||||
* while allowing {@link IndexInput}s open for searching to use arbitrary {@link ReadAdvice}s.
|
||||
*/
|
||||
public IOContext withReadAdvice(ReadAdvice advice) {
|
||||
if (context == Context.READ) {
|
||||
return new IOContext(advice);
|
||||
} else {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,7 +85,7 @@ public class MMapDirectory extends FSDirectory {
|
|||
|
||||
/**
|
||||
* Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
|
||||
* opening them if they use the {@link IOContext#PRELOAD} I/O context.
|
||||
* opening them if they use the {@link ReadAdvice#RANDOM_PRELOAD} advice.
|
||||
*/
|
||||
public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
|
||||
(filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;
|
||||
|
|
|
@ -109,7 +109,8 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
|
|||
out.writeBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
try (final IndexInput in = dir.openInput("test", IOContext.RANDOM)) {
|
||||
try (final IndexInput in =
|
||||
dir.openInput("test", IOContext.READ.withReadAdvice(ReadAdvice.RANDOM))) {
|
||||
final byte[] readBytes = new byte[size];
|
||||
in.readBytes(readBytes, 0, readBytes.length);
|
||||
assertArrayEquals(bytes, readBytes);
|
||||
|
|
Loading…
Reference in New Issue