Use `ReadAdvice#NORMAL` on files that have a forward-only access pattern. (#13450)

This applies to files where performing readahead could help:
 - Doc values data (`.dvd`)
 - Norms data (`.nvd`)
 - Docs and freqs in postings lists (`.doc`)
 - Points data (`.kdd`)

Other files (KNN vectors, stored fields, term vectors) keep using a `RANDOM`
advice.
This commit is contained in:
Adrien Grand 2024-06-05 13:41:58 +02:00 committed by GitHub
parent e868b82045
commit 846aa2f8c3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 16 additions and 4 deletions

View File

@ -44,6 +44,7 @@ import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongValues;
@ -106,7 +107,10 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
String dataName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
this.data = state.directory.openInput(dataName, state.context);
// Doc-values have a forward-only access pattern, so pass ReadAdvice.NORMAL to perform
// readahead.
this.data =
state.directory.openInput(dataName, state.context.withReadAdvice(ReadAdvice.NORMAL));
boolean success = false;
try {
final int version2 =

View File

@ -33,6 +33,7 @@ import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.IOUtils;
/** Reader for {@link Lucene90NormsFormat} */
@ -80,7 +81,8 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
String dataName =
IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
data = state.directory.openInput(dataName, state.context);
// Norms have a forward-only access pattern, so pass ReadAdvice.NORMAL to perform readahead.
data = state.directory.openInput(dataName, state.context.withReadAdvice(ReadAdvice.NORMAL));
boolean success = false;
try {
final int version2 =

View File

@ -71,7 +71,10 @@ public class Lucene90PointsReader extends PointsReader {
readState.segmentSuffix);
CodecUtil.retrieveChecksum(indexIn);
dataIn = readState.directory.openInput(dataFileName, readState.context);
// Points read whole ranges of bytes at once, so pass ReadAdvice.NORMAL to perform readahead.
dataIn =
readState.directory.openInput(
dataFileName, readState.context.withReadAdvice(ReadAdvice.NORMAL));
CodecUtil.checkIndexHeader(
dataIn,
Lucene90PointsFormat.DATA_CODEC_NAME,

View File

@ -41,6 +41,7 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SlowImpactsEnum;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.BytesRef;
@ -78,7 +79,9 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
IndexFileNames.segmentFileName(
state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.DOC_EXTENSION);
try {
docIn = state.directory.openInput(docName, state.context);
// Postings have a forward-only access pattern, so pass ReadAdvice.NORMAL to perform
// readahead.
docIn = state.directory.openInput(docName, state.context.withReadAdvice(ReadAdvice.NORMAL));
version =
CodecUtil.checkIndexHeader(
docIn,