Add prefetching for doc values and norms. (#13411)

This follows a similar approach as postings and only prefetches the first page
of data.

I verified that it works well for collectors such as `TopFieldCollector`, as
`IndexSearcher` first pulls a `LeafCollector`, then a `BulkScorer` and only
then starts feeding the `BulkScorer` into the `LeafCollector`. So the
background I/O for the `LeafCollector` which will prefetch the first page of
doc values and the background I/O for the `BulkScorer` will run in parallel.
This commit is contained in:
Adrien Grand 2024-06-05 13:43:14 +02:00 committed by GitHub
parent 846aa2f8c3
commit 05b4639c0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 86 additions and 1 deletions

View File

@ -359,6 +359,14 @@ public final class IndexedDISI extends DocIdSetIterator {
this.slice = blockSlice;
this.jumpTable = jumpTable;
// Prefetch the first pages of data. Following pages are expected to get prefetched through
// read-ahead.
if (slice.length() > 0) {
slice.prefetch(0, 1);
}
if (jumpTable != null && jumpTable.length() > 0) {
jumpTable.prefetch(0, 1);
}
this.jumpTableEntryCount = jumpTableEntryCount;
this.denseRankPower = denseRankPower;
final int rankIndexShift = denseRankPower - 7;

View File

@ -497,6 +497,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
} else {
final RandomAccessInput slice =
data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (slice.length() > 0) {
slice.prefetch(0, 1);
}
if (entry.blockShift >= 0) {
// dense but split into blocks of different bits per value
return new DenseNumericDocValues(maxDoc) {
@ -558,6 +563,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
} else {
final RandomAccessInput slice =
data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (slice.length() > 0) {
slice.prefetch(0, 1);
}
if (entry.blockShift >= 0) {
// sparse and split into blocks of different bits per value
return new SparseNumericDocValues(disi) {
@ -613,6 +623,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
} else {
final RandomAccessInput slice =
data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (slice.length() > 0) {
slice.prefetch(0, 1);
}
if (entry.blockShift >= 0) {
return new LongValues() {
final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry, slice);
@ -743,6 +758,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
}
final IndexInput bytesSlice = data.slice("fixed-binary", entry.dataOffset, entry.dataLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (bytesSlice.length() > 0) {
bytesSlice.prefetch(0, 1);
}
if (entry.docsWithFieldOffset == -1) {
// dense
@ -763,6 +783,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
// variable length
final RandomAccessInput addressesData =
this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (addressesData.length() > 0) {
addressesData.prefetch(0, 1);
}
final LongValues addresses =
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData, merging);
return new DenseBinaryDocValues(maxDoc) {
@ -805,6 +830,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
// variable length
final RandomAccessInput addressesData =
this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (addressesData.length() > 0) {
addressesData.prefetch(0, 1);
}
final LongValues addresses =
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
return new SparseBinaryDocValues(disi) {
@ -842,6 +872,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final RandomAccessInput slice =
data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (slice.length() > 0) {
slice.prefetch(0, 1);
}
final LongValues values =
getDirectReaderInstance(slice, ordsEntry.bitsPerValue, 0L, ordsEntry.numValues);
@ -1309,6 +1344,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final RandomAccessInput addressesInput =
data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (addressesInput.length() > 0) {
addressesInput.prefetch(0, 1);
}
final LongValues addresses =
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput, merging);
@ -1452,11 +1492,21 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
final RandomAccessInput addressesInput =
data.randomAccessSlice(ordsEntry.addressesOffset, ordsEntry.addressesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (addressesInput.length() > 0) {
addressesInput.prefetch(0, 1);
}
final LongValues addresses =
DirectMonotonicReader.getInstance(ordsEntry.addressesMeta, addressesInput);
final RandomAccessInput slice =
data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (slice.length() > 0) {
slice.prefetch(0, 1);
}
final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue);
if (ordsEntry.docsWithFieldOffset == -1) { // dense
@ -1655,6 +1705,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
? null
: data.randomAccessSlice(
entry.valueJumpTableOffset, data.length() - entry.valueJumpTableOffset);
if (rankSlice != null && rankSlice.length() > 0) {
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
rankSlice.prefetch(0, 1);
}
shift = entry.blockShift;
mul = entry.gcd;
mask = (1 << shift) - 1;

View File

@ -255,6 +255,11 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
if (merging) {
dataInputs.put(field.number, slice);
}
// Prefetch the first page of data. Following pages are expected to get prefetched through
// read-ahead.
if (slice.length() > 0) {
slice.prefetch(0, 1);
}
}
return slice;
}
@ -330,7 +335,7 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
@Override
public long length() {
throw new UnsupportedOperationException("Unused by IndexedDISI");
return inF.length();
}
@Override
@ -342,6 +347,11 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
public void close() throws IOException {
throw new UnsupportedOperationException("Unused by IndexedDISI");
}
@Override
public void prefetch(long offset, long length) throws IOException {
// Not delegating to the wrapped instance on purpose. This is only used for merging.
}
};
}

View File

@ -184,6 +184,11 @@ public abstract class IndexInput extends DataInput implements Closeable {
return slice.readLong();
}
@Override
public void prefetch(long offset, long length) throws IOException {
slice.prefetch(offset, length);
}
@Override
public String toString() {
return "RandomAccessInput(" + IndexInput.this.toString() + ")";

View File

@ -70,4 +70,11 @@ public interface RandomAccessInput {
* @see BitUtil#VH_LE_LONG
*/
long readLong(long pos) throws IOException;
/**
* Prefetch data in the background.
*
* @see IndexInput#prefetch
*/
default void prefetch(long offset, long length) throws IOException {}
}