Use IndexInput#prefetch for postings, skip data and impacts (#13364)

This uses the `IndexInput#prefetch` API for postings. This relies on heuristics, as we don't know ahead of time what data we will need from a postings list: - Postings lists are prefetched entirely when they are short (< 16kB). - Impacts enums also prefetch the first page of skip data. - Postings enums prefetc skip data on the first call to advance(). Positions, offsets and payloads are never prefetched. Putting the `IndexInput#prefetch` call in `TermsEnum#postings` and `TermsEnum#impacts` works well because `BooleanQuery` will first create postings/impacts enums for all clauses before it starts unioning/intersecting them. This allows the prefetching logic to run in parallel across all clauses of the same query on the same segment.
2024-05-17 09:07:07 +02:00 · 2024-05-17 09:07:07 +02:00 · c5331df1c4
parent 3d671a0fbe
commit c5331df1c4
1 changed files with 102 additions and 41 deletions
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java
@ -53,6 +53,9 @@ import org.apache.lucene.util.IOUtils;
 */
 public final class Lucene99PostingsReader extends PostingsReaderBase {

+  /** Maximum byte size of a postings list to be fully prefetched. */
+  private static final int MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH = 16_384;
+
  private final IndexInput docIn;
  private final IndexInput posIn;
  private final IndexInput payIn;
@ -321,6 +324,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {

    private Lucene99SkipReader skipper;
    private boolean skipped;
+    private boolean prefetchedSkipData;

    final IndexInput startDocIn;

@ -393,7 +397,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
          // lazy init
          docIn = startDocIn.clone();
        }
-        docIn.seek(docTermStartFP);
+        seekAndPrefetchPostings(docIn, termState);
      }

      doc = -1;
@ -409,6 +413,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
      nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
      docBufferUpto = BLOCK_SIZE;
      skipped = false;
+      prefetchedSkipData = false;
      return this;
    }

@ -501,44 +506,52 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
    public int advance(int target) throws IOException {
      // current skip docID < docIDs generated from current buffer <= next skip docID
      // we don't need to skip if target is buffered already
-      if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
+      if (docFreq > BLOCK_SIZE) {
+        if (target <= nextSkipDoc) {
+          // We don't need skip data yet, but we have evidence that advance() is called, so let's
+          // prefetch skip data in the background.
+          if (prefetchedSkipData == false) {
+            prefetchSkipData(docIn, docTermStartFP, skipOffset);
+            prefetchedSkipData = true;
+          }
+        } else {
+          if (skipper == null) {
+            // Lazy init: first time this enum has ever been used for skipping
+            skipper =
+                new Lucene99SkipReader(
+                    docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
+          }

-        if (skipper == null) {
-          // Lazy init: first time this enum has ever been used for skipping
-          skipper =
-              new Lucene99SkipReader(
-                  docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
+          if (!skipped) {
+            assert skipOffset != -1;
+            // This is the first time this enum has skipped
+            // since reset() was called; load the skip data:
+            skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq);
+            skipped = true;
+          }
+
+          // always plus one to fix the result, since skip position in Lucene99SkipReader
+          // is a little different from MultiLevelSkipListReader
+          final int newDocUpto = skipper.skipTo(target) + 1;
+
+          if (newDocUpto >= blockUpto) {
+            // Skipper moved
+            assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+            blockUpto = newDocUpto;
+
+            // Force to read next block
+            docBufferUpto = BLOCK_SIZE;
+            accum = skipper.getDoc(); // actually, this is just lastSkipEntry
+            docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
+            // even if freqBuffer were not read from the previous block, we will mark them as read,
+            // as we don't need to skip the previous block freqBuffer in refillDocs,
+            // as we have already positioned docIn where in needs to be.
+            isFreqsRead = true;
+          }
+          // next time we call advance, this is used to
+          // foresee whether skipper is necessary.
+          nextSkipDoc = skipper.getNextSkipDoc();
        }
-
-        if (!skipped) {
-          assert skipOffset != -1;
-          // This is the first time this enum has skipped
-          // since reset() was called; load the skip data:
-          skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq);
-          skipped = true;
-        }
-
-        // always plus one to fix the result, since skip position in Lucene99SkipReader
-        // is a little different from MultiLevelSkipListReader
-        final int newDocUpto = skipper.skipTo(target) + 1;
-
-        if (newDocUpto >= blockUpto) {
-          // Skipper moved
-          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
-          blockUpto = newDocUpto;
-
-          // Force to read next block
-          docBufferUpto = BLOCK_SIZE;
-          accum = skipper.getDoc(); // actually, this is just lastSkipEntry
-          docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
-          // even if freqBuffer were not read from the previous block, we will mark them as read,
-          // as we don't need to skip the previous block freqBuffer in refillDocs,
-          // as we have already positioned docIn where in needs to be.
-          isFreqsRead = true;
-        }
-        // next time we call advance, this is used to
-        // foresee whether skipper is necessary.
-        nextSkipDoc = skipper.getNextSkipDoc();
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
@ -594,6 +607,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {

    private Lucene99SkipReader skipper;
    private boolean skipped;
+    private boolean prefetchedSkipData;

    final IndexInput startDocIn;

@ -715,7 +729,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
          // lazy init
          docIn = startDocIn.clone();
        }
-        docIn.seek(docTermStartFP);
+        seekAndPrefetchPostings(docIn, termState);
      }
      posPendingFP = posTermStartFP;
      payPendingFP = payTermStartFP;
@ -741,6 +755,7 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
      }
      docBufferUpto = BLOCK_SIZE;
      skipped = false;
+      prefetchedSkipData = false;
      return this;
    }

@ -902,6 +917,13 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
          payloadByteUpto = skipper.getPayloadByteUpto();
        }
        nextSkipDoc = skipper.getNextSkipDoc();
+      } else {
+        // We don't need skip data yet, but we have evidence that advance() is used, so prefetch it
+        // in the background.
+        if (prefetchedSkipData == false) {
+          prefetchSkipData(docIn, docTermStartFP, skipOffset);
+          prefetchedSkipData = true;
+        }
      }
      if (docBufferUpto == BLOCK_SIZE) {
        refillDocs();
@ -1097,7 +1119,9 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
      this.docIn = Lucene99PostingsReader.this.docIn.clone();

      docFreq = termState.docFreq;
-      docIn.seek(termState.docStartFP);
+      seekAndPrefetchPostings(docIn, termState);
+      // Impacts almost certainly need skip data
+      prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);

      doc = -1;
      accum = 0;
@ -1318,7 +1342,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
      posTermStartFP = termState.posStartFP;
      payTermStartFP = termState.payStartFP;
      totalTermFreq = termState.totalTermFreq;
-      docIn.seek(docTermStartFP);
+      seekAndPrefetchPostings(docIn, termState);
+      prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
      posPendingFP = posTermStartFP;
      posPendingCount = 0;
      if (termState.totalTermFreq < BLOCK_SIZE) {
@ -1672,7 +1697,8 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
      posTermStartFP = termState.posStartFP;
      payTermStartFP = termState.payStartFP;
      totalTermFreq = termState.totalTermFreq;
-      docIn.seek(docTermStartFP);
+      seekAndPrefetchPostings(docIn, termState);
+      prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
      posPendingFP = posTermStartFP;
      payPendingFP = payTermStartFP;
      posPendingCount = 0;
@ -2049,6 +2075,41 @@ public final class Lucene99PostingsReader extends PostingsReaderBase {
    }
  }

+  private void seekAndPrefetchPostings(IndexInput docIn, IntBlockTermState state)
+      throws IOException {
+    if (docIn.getFilePointer() != state.docStartFP) {
+      // Don't prefetch if the input is already positioned at the right offset, which suggests that
+      // the caller is streaming the entire inverted index (e.g. for merging), let the read-ahead
+      // logic do its work instead. Note that this heuristic doesn't work for terms that have skip
+      // data, since skip data is stored after the last term, but handling all terms that have <128
+      // docs is a good start already.
+      docIn.seek(state.docStartFP);
+      if (state.skipOffset < 0) {
+        // This postings list is very short as it doesn't have skip data, prefetch the page that
+        // holds the first byte of the postings list.
+        docIn.prefetch(state.docStartFP, 1);
+      } else if (state.skipOffset <= MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH) {
+        // This postings list is short as it fits on a few pages, prefetch it all, plus one byte to
+        // make sure to include some skip data.
+        docIn.prefetch(state.docStartFP, state.skipOffset + 1);
+      } else {
+        // Default case: prefetch the page that holds the first byte of postings. We'll prefetch
+        // skip data when we have evidence that it is used.
+        docIn.prefetch(state.docStartFP, 1);
+      }
+    }
+    // Note: we don't prefetch positions or offsets, which are less likely to be needed.
+  }
+
+  private void prefetchSkipData(IndexInput docIn, long docStartFP, long skipOffset)
+      throws IOException {
+    if (skipOffset > MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH) {
+      // If skipOffset is less than this value, skip data was already prefetched when doing
+      // #seekAndPrefetchPostings
+      docIn.prefetch(docStartFP + skipOffset, 1);
+    }
+  }
+
  @Override
  public void checkIntegrity() throws IOException {
    if (docIn != null) {