mirror of https://github.com/apache/lucene.git
More consistently use a SEQUENTIAL ReadAdvice for merging. (#13229)
Merging `IOContext`s use a `SEQUENTIAL` `ReadAdvice`. However, some file formats hardcode `IOContext.LOAD` for some of their files, which silences the whole merging context, in particular the `SEQUENTIAL` `ReadAdvice`. This PR switches file formats to `ioContext.withReadAdvice(ReadAdvice.RANDOM_PRELOAD)` so that merges will use a `SEQUENTIAL` `ReadAdvice` while searches will use a `RANDOM_PRELOAD` `ReadAdvice`. This is not a huge deal for `RANDOM_PRELOAD`, which is only used for very small files. However, this change becomes more relevant for the new `RANDOM` `ReadAdvice` as we would like merges to keep using a `SEQUENTIAL` `ReadAdvice`.
This commit is contained in:
parent
878d233bc1
commit
a807772d41
|
@ -185,9 +185,10 @@ access the members using method calls instead of field accesses. Affected classe
|
||||||
The `readOnce`, `load` and `random` flags on `IOContext` have been replaced with a new `ReadAdvice`
|
The `readOnce`, `load` and `random` flags on `IOContext` have been replaced with a new `ReadAdvice`
|
||||||
enum.
|
enum.
|
||||||
|
|
||||||
### IOContext.LOAD renamed to IOContext.PRELOAD
|
### IOContext.LOAD removed
|
||||||
|
|
||||||
`IOContext#LOAD` has been replaced with `IOContext#PRELOAD`.
|
`IOContext#LOAD` has been removed, it should be replaced with
|
||||||
|
`ioContext.toReadAdvice(ReadAdvice.RANDOM_PRELOAD)`.
|
||||||
|
|
||||||
## Migration from Lucene 9.0 to Lucene 9.1
|
## Migration from Lucene 9.0 to Lucene 9.1
|
||||||
|
|
||||||
|
|
|
@ -39,8 +39,8 @@ import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.store.ByteArrayDataInput;
|
import org.apache.lucene.store.ByteArrayDataInput;
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.ReadAdvice;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
@ -74,7 +74,9 @@ public class FSTTermsReader extends FieldsProducer {
|
||||||
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
|
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);
|
||||||
|
|
||||||
this.postingsReader = postingsReader;
|
this.postingsReader = postingsReader;
|
||||||
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.PRELOAD);
|
this.fstTermsInput =
|
||||||
|
state.directory.openInput(
|
||||||
|
termsFileName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||||
|
|
||||||
IndexInput in = this.fstTermsInput;
|
IndexInput in = this.fstTermsInput;
|
||||||
|
|
||||||
|
|
|
@ -27,8 +27,8 @@ import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.PointValues;
|
import org.apache.lucene.index.PointValues;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.ReadAdvice;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.bkd.BKDReader;
|
import org.apache.lucene.util.bkd.BKDReader;
|
||||||
|
|
||||||
|
@ -60,7 +60,9 @@ public class Lucene90PointsReader extends PointsReader {
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
indexIn = readState.directory.openInput(indexFileName, IOContext.PRELOAD);
|
indexIn =
|
||||||
|
readState.directory.openInput(
|
||||||
|
indexFileName, readState.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||||
CodecUtil.checkIndexHeader(
|
CodecUtil.checkIndexHeader(
|
||||||
indexIn,
|
indexIn,
|
||||||
Lucene90PointsFormat.INDEX_CODEC_NAME,
|
Lucene90PointsFormat.INDEX_CODEC_NAME,
|
||||||
|
|
|
@ -32,8 +32,8 @@ import org.apache.lucene.index.IndexOptions;
|
||||||
import org.apache.lucene.index.SegmentReadState;
|
import org.apache.lucene.index.SegmentReadState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.store.ChecksumIndexInput;
|
import org.apache.lucene.store.ChecksumIndexInput;
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.ReadAdvice;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.CollectionUtil;
|
import org.apache.lucene.util.CollectionUtil;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -143,7 +143,9 @@ public final class Lucene90BlockTreeTermsReader extends FieldsProducer {
|
||||||
|
|
||||||
String indexName =
|
String indexName =
|
||||||
IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
|
IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
|
||||||
indexIn = state.directory.openInput(indexName, IOContext.PRELOAD);
|
indexIn =
|
||||||
|
state.directory.openInput(
|
||||||
|
indexName, state.context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||||
CodecUtil.checkIndexHeader(
|
CodecUtil.checkIndexHeader(
|
||||||
indexIn,
|
indexIn,
|
||||||
TERMS_INDEX_CODEC_NAME,
|
TERMS_INDEX_CODEC_NAME,
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IOContext;
|
import org.apache.lucene.store.IOContext;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.store.RandomAccessInput;
|
import org.apache.lucene.store.RandomAccessInput;
|
||||||
|
import org.apache.lucene.store.ReadAdvice;
|
||||||
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
import org.apache.lucene.util.packed.DirectMonotonicReader;
|
||||||
|
|
||||||
final class FieldsIndexReader extends FieldsIndex {
|
final class FieldsIndexReader extends FieldsIndex {
|
||||||
|
@ -52,7 +53,8 @@ final class FieldsIndexReader extends FieldsIndex {
|
||||||
String extension,
|
String extension,
|
||||||
String codecName,
|
String codecName,
|
||||||
byte[] id,
|
byte[] id,
|
||||||
IndexInput metaIn)
|
IndexInput metaIn,
|
||||||
|
IOContext context)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
maxDoc = metaIn.readInt();
|
maxDoc = metaIn.readInt();
|
||||||
blockShift = metaIn.readInt();
|
blockShift = metaIn.readInt();
|
||||||
|
@ -65,7 +67,9 @@ final class FieldsIndexReader extends FieldsIndex {
|
||||||
maxPointer = metaIn.readLong();
|
maxPointer = metaIn.readLong();
|
||||||
|
|
||||||
indexInput =
|
indexInput =
|
||||||
dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.PRELOAD);
|
dir.openInput(
|
||||||
|
IndexFileNames.segmentFileName(name, suffix, extension),
|
||||||
|
context.withReadAdvice(ReadAdvice.RANDOM_PRELOAD));
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
CodecUtil.checkIndexHeader(
|
CodecUtil.checkIndexHeader(
|
||||||
|
|
|
@ -163,7 +163,14 @@ public final class Lucene90CompressingStoredFieldsReader extends StoredFieldsRea
|
||||||
|
|
||||||
FieldsIndexReader fieldsIndexReader =
|
FieldsIndexReader fieldsIndexReader =
|
||||||
new FieldsIndexReader(
|
new FieldsIndexReader(
|
||||||
d, si.name, segmentSuffix, INDEX_EXTENSION, INDEX_CODEC_NAME, si.getId(), metaIn);
|
d,
|
||||||
|
si.name,
|
||||||
|
segmentSuffix,
|
||||||
|
INDEX_EXTENSION,
|
||||||
|
INDEX_CODEC_NAME,
|
||||||
|
si.getId(),
|
||||||
|
metaIn,
|
||||||
|
context);
|
||||||
indexReader = fieldsIndexReader;
|
indexReader = fieldsIndexReader;
|
||||||
maxPointer = fieldsIndexReader.getMaxPointer();
|
maxPointer = fieldsIndexReader.getMaxPointer();
|
||||||
|
|
||||||
|
|
|
@ -169,7 +169,8 @@ public final class Lucene90CompressingTermVectorsReader extends TermVectorsReade
|
||||||
VECTORS_INDEX_EXTENSION,
|
VECTORS_INDEX_EXTENSION,
|
||||||
VECTORS_INDEX_CODEC_NAME,
|
VECTORS_INDEX_CODEC_NAME,
|
||||||
si.getId(),
|
si.getId(),
|
||||||
metaIn);
|
metaIn,
|
||||||
|
context);
|
||||||
|
|
||||||
this.indexReader = fieldsIndexReader;
|
this.indexReader = fieldsIndexReader;
|
||||||
this.maxPointer = fieldsIndexReader.getMaxPointer();
|
this.maxPointer = fieldsIndexReader.getMaxPointer();
|
||||||
|
|
|
@ -49,10 +49,6 @@ public record IOContext(
|
||||||
|
|
||||||
public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);
|
public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);
|
||||||
|
|
||||||
public static final IOContext PRELOAD = new IOContext(ReadAdvice.RANDOM_PRELOAD);
|
|
||||||
|
|
||||||
public static final IOContext RANDOM = new IOContext(ReadAdvice.RANDOM);
|
|
||||||
|
|
||||||
@SuppressWarnings("incomplete-switch")
|
@SuppressWarnings("incomplete-switch")
|
||||||
public IOContext {
|
public IOContext {
|
||||||
Objects.requireNonNull(context, "context must not be null");
|
Objects.requireNonNull(context, "context must not be null");
|
||||||
|
@ -88,4 +84,18 @@ public record IOContext(
|
||||||
// Merges read input segments sequentially.
|
// Merges read input segments sequentially.
|
||||||
this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
|
this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return an updated {@link IOContext} that has the provided {@link ReadAdvice} if the {@link
|
||||||
|
* Context} is a {@link Context#READ} context, otherwise return this existing instance. This helps
|
||||||
|
* preserve a {@link ReadAdvice#SEQUENTIAL} advice for merging, which is always the right choice,
|
||||||
|
* while allowing {@link IndexInput}s open for searching to use arbitrary {@link ReadAdvice}s.
|
||||||
|
*/
|
||||||
|
public IOContext withReadAdvice(ReadAdvice advice) {
|
||||||
|
if (context == Context.READ) {
|
||||||
|
return new IOContext(advice);
|
||||||
|
} else {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,7 +85,7 @@ public class MMapDirectory extends FSDirectory {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
|
* Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
|
||||||
* opening them if they use the {@link IOContext#PRELOAD} I/O context.
|
* opening them if they use the {@link ReadAdvice#RANDOM_PRELOAD} advice.
|
||||||
*/
|
*/
|
||||||
public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
|
public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
|
||||||
(filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;
|
(filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;
|
||||||
|
|
|
@ -109,7 +109,8 @@ public class TestMMapDirectory extends BaseDirectoryTestCase {
|
||||||
out.writeBytes(bytes, 0, bytes.length);
|
out.writeBytes(bytes, 0, bytes.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
try (final IndexInput in = dir.openInput("test", IOContext.RANDOM)) {
|
try (final IndexInput in =
|
||||||
|
dir.openInput("test", IOContext.READ.withReadAdvice(ReadAdvice.RANDOM))) {
|
||||||
final byte[] readBytes = new byte[size];
|
final byte[] readBytes = new byte[size];
|
||||||
in.readBytes(readBytes, 0, readBytes.length);
|
in.readBytes(readBytes, 0, readBytes.length);
|
||||||
assertArrayEquals(bytes, readBytes);
|
assertArrayEquals(bytes, readBytes);
|
||||||
|
|
Loading…
Reference in New Issue