From 846aa2f8c3f1e6a691f0448d90023b1bbb5b5ada Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 5 Jun 2024 13:41:58 +0200 Subject: [PATCH] Use `ReadAdvice#NORMAL` on files that have a forward-only access pattern. (#13450) This applies to files where performing readahead could help: - Doc values data (`.dvd`) - Norms data (`.nvd`) - Docs and freqs in postings lists (`.doc`) - Points data (`.kdd`) Other files (KNN vectors, stored fields, term vectors) keep using a `RANDOM` advice. --- .../lucene/codecs/lucene90/Lucene90DocValuesProducer.java | 6 +++++- .../lucene/codecs/lucene90/Lucene90NormsProducer.java | 4 +++- .../apache/lucene/codecs/lucene90/Lucene90PointsReader.java | 5 ++++- .../lucene/codecs/lucene99/Lucene99PostingsReader.java | 5 ++++- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java index f0b7b723432..16272c156b1 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90DocValuesProducer.java @@ -44,6 +44,7 @@ import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LongValues; @@ -106,7 +107,10 @@ final class Lucene90DocValuesProducer extends DocValuesProducer { String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); - this.data = state.directory.openInput(dataName, state.context); + // Doc-values have a forward-only access pattern, so pass ReadAdvice.NORMAL to perform + // readahead. + this.data = + state.directory.openInput(dataName, state.context.withReadAdvice(ReadAdvice.NORMAL)); boolean success = false; try { final int version2 = diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java index 0b996c5d952..4655091d57a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java @@ -33,6 +33,7 @@ import org.apache.lucene.internal.hppc.IntObjectHashMap; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.IOUtils; /** Reader for {@link Lucene90NormsFormat} */ @@ -80,7 +81,8 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable { String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); - data = state.directory.openInput(dataName, state.context); + // Norms have a forward-only access pattern, so pass ReadAdvice.NORMAL to perform readahead. + data = state.directory.openInput(dataName, state.context.withReadAdvice(ReadAdvice.NORMAL)); boolean success = false; try { final int version2 = diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java index 75805d798bb..d3f256cbf00 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java @@ -71,7 +71,10 @@ public class Lucene90PointsReader extends PointsReader { readState.segmentSuffix); CodecUtil.retrieveChecksum(indexIn); - dataIn = readState.directory.openInput(dataFileName, readState.context); + // Points read whole ranges of bytes at once, so pass ReadAdvice.NORMAL to perform readahead. + dataIn = + readState.directory.openInput( + dataFileName, readState.context.withReadAdvice(ReadAdvice.NORMAL)); CodecUtil.checkIndexHeader( dataIn, Lucene90PointsFormat.DATA_CODEC_NAME, diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java index 13353c42603..846a9f307eb 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java @@ -41,6 +41,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SlowImpactsEnum; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.BytesRef; @@ -78,7 +79,9 @@ public final class Lucene99PostingsReader extends PostingsReaderBase { IndexFileNames.segmentFileName( state.segmentInfo.name, state.segmentSuffix, Lucene99PostingsFormat.DOC_EXTENSION); try { - docIn = state.directory.openInput(docName, state.context); + // Postings have a forward-only access pattern, so pass ReadAdvice.NORMAL to perform + // readahead. + docIn = state.directory.openInput(docName, state.context.withReadAdvice(ReadAdvice.NORMAL)); version = CodecUtil.checkIndexHeader( docIn,