mirror of https://github.com/apache/lucene.git
Add prefetching for doc values and norms. (#13411)
This follows a similar approach as postings and only prefetches the first page of data. I verified that it works well for collectors such as `TopFieldCollector`, as `IndexSearcher` first pulls a `LeafCollector`, then a `BulkScorer` and only then starts feeding the `BulkScorer` into the `LeafCollector`. So the background I/O for the `LeafCollector` which will prefetch the first page of doc values and the background I/O for the `BulkScorer` will run in parallel.
This commit is contained in:
parent
846aa2f8c3
commit
05b4639c0c
|
@ -359,6 +359,14 @@ public final class IndexedDISI extends DocIdSetIterator {
|
|||
|
||||
this.slice = blockSlice;
|
||||
this.jumpTable = jumpTable;
|
||||
// Prefetch the first pages of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (slice.length() > 0) {
|
||||
slice.prefetch(0, 1);
|
||||
}
|
||||
if (jumpTable != null && jumpTable.length() > 0) {
|
||||
jumpTable.prefetch(0, 1);
|
||||
}
|
||||
this.jumpTableEntryCount = jumpTableEntryCount;
|
||||
this.denseRankPower = denseRankPower;
|
||||
final int rankIndexShift = denseRankPower - 7;
|
||||
|
|
|
@ -497,6 +497,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
} else {
|
||||
final RandomAccessInput slice =
|
||||
data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (slice.length() > 0) {
|
||||
slice.prefetch(0, 1);
|
||||
}
|
||||
if (entry.blockShift >= 0) {
|
||||
// dense but split into blocks of different bits per value
|
||||
return new DenseNumericDocValues(maxDoc) {
|
||||
|
@ -558,6 +563,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
} else {
|
||||
final RandomAccessInput slice =
|
||||
data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (slice.length() > 0) {
|
||||
slice.prefetch(0, 1);
|
||||
}
|
||||
if (entry.blockShift >= 0) {
|
||||
// sparse and split into blocks of different bits per value
|
||||
return new SparseNumericDocValues(disi) {
|
||||
|
@ -613,6 +623,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
} else {
|
||||
final RandomAccessInput slice =
|
||||
data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (slice.length() > 0) {
|
||||
slice.prefetch(0, 1);
|
||||
}
|
||||
if (entry.blockShift >= 0) {
|
||||
return new LongValues() {
|
||||
final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry, slice);
|
||||
|
@ -743,6 +758,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
}
|
||||
|
||||
final IndexInput bytesSlice = data.slice("fixed-binary", entry.dataOffset, entry.dataLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (bytesSlice.length() > 0) {
|
||||
bytesSlice.prefetch(0, 1);
|
||||
}
|
||||
|
||||
if (entry.docsWithFieldOffset == -1) {
|
||||
// dense
|
||||
|
@ -763,6 +783,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
// variable length
|
||||
final RandomAccessInput addressesData =
|
||||
this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (addressesData.length() > 0) {
|
||||
addressesData.prefetch(0, 1);
|
||||
}
|
||||
final LongValues addresses =
|
||||
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData, merging);
|
||||
return new DenseBinaryDocValues(maxDoc) {
|
||||
|
@ -805,6 +830,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
// variable length
|
||||
final RandomAccessInput addressesData =
|
||||
this.data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (addressesData.length() > 0) {
|
||||
addressesData.prefetch(0, 1);
|
||||
}
|
||||
final LongValues addresses =
|
||||
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesData);
|
||||
return new SparseBinaryDocValues(disi) {
|
||||
|
@ -842,6 +872,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
final RandomAccessInput slice =
|
||||
data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (slice.length() > 0) {
|
||||
slice.prefetch(0, 1);
|
||||
}
|
||||
final LongValues values =
|
||||
getDirectReaderInstance(slice, ordsEntry.bitsPerValue, 0L, ordsEntry.numValues);
|
||||
|
||||
|
@ -1309,6 +1344,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
final RandomAccessInput addressesInput =
|
||||
data.randomAccessSlice(entry.addressesOffset, entry.addressesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (addressesInput.length() > 0) {
|
||||
addressesInput.prefetch(0, 1);
|
||||
}
|
||||
final LongValues addresses =
|
||||
DirectMonotonicReader.getInstance(entry.addressesMeta, addressesInput, merging);
|
||||
|
||||
|
@ -1452,11 +1492,21 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
|
||||
final RandomAccessInput addressesInput =
|
||||
data.randomAccessSlice(ordsEntry.addressesOffset, ordsEntry.addressesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (addressesInput.length() > 0) {
|
||||
addressesInput.prefetch(0, 1);
|
||||
}
|
||||
final LongValues addresses =
|
||||
DirectMonotonicReader.getInstance(ordsEntry.addressesMeta, addressesInput);
|
||||
|
||||
final RandomAccessInput slice =
|
||||
data.randomAccessSlice(ordsEntry.valuesOffset, ordsEntry.valuesLength);
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (slice.length() > 0) {
|
||||
slice.prefetch(0, 1);
|
||||
}
|
||||
final LongValues values = DirectReader.getInstance(slice, ordsEntry.bitsPerValue);
|
||||
|
||||
if (ordsEntry.docsWithFieldOffset == -1) { // dense
|
||||
|
@ -1655,6 +1705,11 @@ final class Lucene90DocValuesProducer extends DocValuesProducer {
|
|||
? null
|
||||
: data.randomAccessSlice(
|
||||
entry.valueJumpTableOffset, data.length() - entry.valueJumpTableOffset);
|
||||
if (rankSlice != null && rankSlice.length() > 0) {
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
rankSlice.prefetch(0, 1);
|
||||
}
|
||||
shift = entry.blockShift;
|
||||
mul = entry.gcd;
|
||||
mask = (1 << shift) - 1;
|
||||
|
|
|
@ -255,6 +255,11 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
|
|||
if (merging) {
|
||||
dataInputs.put(field.number, slice);
|
||||
}
|
||||
// Prefetch the first page of data. Following pages are expected to get prefetched through
|
||||
// read-ahead.
|
||||
if (slice.length() > 0) {
|
||||
slice.prefetch(0, 1);
|
||||
}
|
||||
}
|
||||
return slice;
|
||||
}
|
||||
|
@ -330,7 +335,7 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
|
|||
|
||||
@Override
|
||||
public long length() {
|
||||
throw new UnsupportedOperationException("Unused by IndexedDISI");
|
||||
return inF.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -342,6 +347,11 @@ final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
|
|||
public void close() throws IOException {
|
||||
throw new UnsupportedOperationException("Unused by IndexedDISI");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prefetch(long offset, long length) throws IOException {
|
||||
// Not delegating to the wrapped instance on purpose. This is only used for merging.
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -184,6 +184,11 @@ public abstract class IndexInput extends DataInput implements Closeable {
|
|||
return slice.readLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void prefetch(long offset, long length) throws IOException {
|
||||
slice.prefetch(offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "RandomAccessInput(" + IndexInput.this.toString() + ")";
|
||||
|
|
|
@ -70,4 +70,11 @@ public interface RandomAccessInput {
|
|||
* @see BitUtil#VH_LE_LONG
|
||||
*/
|
||||
long readLong(long pos) throws IOException;
|
||||
|
||||
/**
|
||||
* Prefetch data in the background.
|
||||
*
|
||||
* @see IndexInput#prefetch
|
||||
*/
|
||||
default void prefetch(long offset, long length) throws IOException {}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue