From b2a10e3643c86a37ecb3f74b7d9ee491743de48b Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Mon, 2 Dec 2024 23:26:04 +0100
Subject: [PATCH] Speed up PostingsEnum when reading positions. (#14032)

This PR changes the following:
 - As much work as possible is moved from `nextDoc()`/`advance()` to
   `nextPosition()`. This helps only pay the overhead of reading positions when
   all query terms agree on a candidate.
 - Frequencies are read lazily. Again, this helps in case a document is needed
   in a block, but clauses do not agree on a common candidate match, so
   frequencies are never decoded.
 - A few other minor optimizations.
---
 lucene/CHANGES.txt                            |   3 +
 .../lucene101/Lucene101PostingsReader.java    | 297 +++++++++++-------
 2 files changed, 187 insertions(+), 113 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7a788fb585c..ad39c8b3451 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -119,6 +119,9 @@ Optimizations
 * GITHUB#14023: Make JVM inlining decisions more predictable in our main
   queries. (Adrien Grand)
 
+* GITHUB#14032: Speed up PostingsEnum when positions are requested.
+  (Adrien Grand)
+
 Bug Fixes
 ---------------------
 * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java
index 9e79aaf71e1..d879a58b4ab 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene101/Lucene101PostingsReader.java
@@ -638,9 +638,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
     final boolean indexHasPayloads;
     final boolean indexHasOffsetsOrPayloads;
 
-    private int freq; // freq we last read
+    private long freqFP; // offset of the freq block
+
     private int position; // current position
 
+    // value of docBufferUpto on the last doc ID when positions have been read
+    private int posDocBufferUpto;
+
     // how many positions "behind" we are; nextPosition must
     // skip these to "catch up":
     private int posPendingCount;
@@ -662,6 +666,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
 
     private boolean needsOffsets; // true if we actually need offsets
     private boolean needsPayloads; // true if we actually need payloads
+    private boolean needsPayloadsOrOffsets;
 
     public EverythingEnum(FieldInfo fieldInfo) throws IOException {
       super(fieldInfo);
@@ -745,8 +750,11 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
         lastPosBlockFP = posTermStartFP + termState.lastPosBlockOffset;
       }
 
-      this.needsOffsets = PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
-      this.needsPayloads = PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
+      this.needsOffsets =
+          indexHasOffsets && PostingsEnum.featureRequested(flags, PostingsEnum.OFFSETS);
+      this.needsPayloads =
+          indexHasPayloads && PostingsEnum.featureRequested(flags, PostingsEnum.PAYLOADS);
+      this.needsPayloadsOrOffsets = this.needsPayloads || this.needsOffsets;
 
       level1BlockPosUpto = 0;
       level1BlockPayUpto = 0;
@@ -758,8 +766,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
     }
 
     @Override
-    public int freq() {
-      return freq;
+    public int freq() throws IOException {
+      if (freqFP != -1) {
+        docIn.seek(freqFP);
+        pforUtil.decode(docInUtil, freqBuffer);
+        freqFP = -1;
+      }
+      return freqBuffer[docBufferUpto - 1];
     }
 
     private void refillDocs() throws IOException {
@@ -768,11 +781,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
 
       if (left >= BLOCK_SIZE) {
         forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
-        pforUtil.decode(docInUtil, freqBuffer);
+        freqFP = docIn.getFilePointer();
+        PForUtil.skip(docIn);
         docCountUpto += BLOCK_SIZE;
       } else if (docFreq == 1) {
         docBuffer[0] = singletonDocID;
         freqBuffer[0] = (int) totalTermFreq;
+        freqFP = -1;
         docBuffer[1] = NO_MORE_DOCS;
         docCountUpto++;
         docBufferSize = 1;
@@ -781,11 +796,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
         PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
         prefixSum(docBuffer, left, prevDocID);
         docBuffer[left] = NO_MORE_DOCS;
+        freqFP = -1;
         docCountUpto += left;
         docBufferSize = left;
       }
       prevDocID = docBuffer[BLOCK_SIZE - 1];
       docBufferUpto = 0;
+      posDocBufferUpto = 0;
       assert docBuffer[docBufferSize] == NO_MORE_DOCS;
     }
 
@@ -846,6 +863,8 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
           payloadByteUpto = level0BlockPayUpto;
         }
         posBufferUpto = BLOCK_SIZE;
+      } else {
+        posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE);
       }
 
       if (docFreq - docCountUpto >= BLOCK_SIZE) {
@@ -875,34 +894,23 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
       }
 
       this.doc = docBuffer[docBufferUpto];
-      this.freq = freqBuffer[docBufferUpto];
       docBufferUpto++;
-      posPendingCount += freq;
-      position = 0;
-      lastStartOffset = 0;
       return doc;
     }
 
     private void skipLevel0To(int target) throws IOException {
+      long posFP;
+      int posUpto;
+      long payFP;
+      int payUpto;
+
       while (true) {
         prevDocID = level0LastDocID;
 
-        // If nextBlockPosFP is less than the current FP, it means that the block of positions for
-        // the first docs of the next block are already decoded. In this case we just accumulate
-        // frequencies into posPendingCount instead of seeking backwards and decoding the same pos
-        // block again.
-        if (level0PosEndFP >= posIn.getFilePointer()) {
-          posIn.seek(level0PosEndFP);
-          posPendingCount = level0BlockPosUpto;
-          if (indexHasOffsetsOrPayloads) {
-            assert level0PayEndFP >= payIn.getFilePointer();
-            payIn.seek(level0PayEndFP);
-            payloadByteUpto = level0BlockPayUpto;
-          }
-          posBufferUpto = BLOCK_SIZE;
-        } else {
-          posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE);
-        }
+        posFP = level0PosEndFP;
+        posUpto = level0BlockPosUpto;
+        payFP = level0PayEndFP;
+        payUpto = level0BlockPayUpto;
 
         if (docFreq - docCountUpto >= BLOCK_SIZE) {
           docIn.readVLong(); // skip0 num bytes
@@ -931,6 +939,23 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
           break;
         }
       }
+
+      // If nextBlockPosFP is less than the current FP, it means that the block of positions for
+      // the first docs of the next block are already decoded. In this case we just accumulate
+      // frequencies into posPendingCount instead of seeking backwards and decoding the same pos
+      // block again.
+      if (posFP >= posIn.getFilePointer()) {
+        posIn.seek(posFP);
+        posPendingCount = posUpto;
+        if (indexHasOffsetsOrPayloads) {
+          assert level0PayEndFP >= payIn.getFilePointer();
+          payIn.seek(payFP);
+          payloadByteUpto = payUpto;
+        }
+        posBufferUpto = BLOCK_SIZE;
+      } else {
+        posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE);
+      }
     }
 
     @Override
@@ -947,16 +972,12 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
       }
 
       int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
-      posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
-      this.freq = freqBuffer[next];
       this.docBufferUpto = next + 1;
-      position = 0;
-      lastStartOffset = 0;
 
       return this.doc = docBuffer[next];
     }
 
-    private void skipPositions() throws IOException {
+    private void skipPositions(int freq) throws IOException {
       // Skip positions now:
       int toSkip = posPendingCount - freq;
       // if (DEBUG) {
@@ -1003,41 +1024,45 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
       lastStartOffset = 0;
     }
 
+    private void refillLastPositionBlock() throws IOException {
+      final int count = (int) (totalTermFreq % BLOCK_SIZE);
+      int payloadLength = 0;
+      int offsetLength = 0;
+      payloadByteUpto = 0;
+      for (int i = 0; i < count; i++) {
+        int code = posIn.readVInt();
+        if (indexHasPayloads) {
+          if ((code & 1) != 0) {
+            payloadLength = posIn.readVInt();
+          }
+          payloadLengthBuffer[i] = payloadLength;
+          posDeltaBuffer[i] = code >>> 1;
+          if (payloadLength != 0) {
+            if (payloadByteUpto + payloadLength > payloadBytes.length) {
+              payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
+            }
+            posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
+            payloadByteUpto += payloadLength;
+          }
+        } else {
+          posDeltaBuffer[i] = code;
+        }
+
+        if (indexHasOffsets) {
+          int deltaCode = posIn.readVInt();
+          if ((deltaCode & 1) != 0) {
+            offsetLength = posIn.readVInt();
+          }
+          offsetStartDeltaBuffer[i] = deltaCode >>> 1;
+          offsetLengthBuffer[i] = offsetLength;
+        }
+      }
+      payloadByteUpto = 0;
+    }
+
     private void refillPositions() throws IOException {
       if (posIn.getFilePointer() == lastPosBlockFP) {
-        final int count = (int) (totalTermFreq % BLOCK_SIZE);
-        int payloadLength = 0;
-        int offsetLength = 0;
-        payloadByteUpto = 0;
-        for (int i = 0; i < count; i++) {
-          int code = posIn.readVInt();
-          if (indexHasPayloads) {
-            if ((code & 1) != 0) {
-              payloadLength = posIn.readVInt();
-            }
-            payloadLengthBuffer[i] = payloadLength;
-            posDeltaBuffer[i] = code >>> 1;
-            if (payloadLength != 0) {
-              if (payloadByteUpto + payloadLength > payloadBytes.length) {
-                payloadBytes = ArrayUtil.grow(payloadBytes, payloadByteUpto + payloadLength);
-              }
-              posIn.readBytes(payloadBytes, payloadByteUpto, payloadLength);
-              payloadByteUpto += payloadLength;
-            }
-          } else {
-            posDeltaBuffer[i] = code;
-          }
-
-          if (indexHasOffsets) {
-            int deltaCode = posIn.readVInt();
-            if ((deltaCode & 1) != 0) {
-              offsetLength = posIn.readVInt();
-            }
-            offsetStartDeltaBuffer[i] = deltaCode >>> 1;
-            offsetLengthBuffer[i] = offsetLength;
-          }
-        }
-        payloadByteUpto = 0;
+        refillLastPositionBlock();
       } else {
         pforUtil.decode(posInUtil, posDeltaBuffer);
 
@@ -1054,8 +1079,7 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
             // this works, because when writing a vint block we always force the first length to be
             // written
             PForUtil.skip(payIn); // skip over lengths
-            int numBytes = payIn.readVInt(); // read length of payloadBytes
-            payIn.seek(payIn.getFilePointer() + numBytes); // skip over payloadBytes
+            payIn.skipBytes(payIn.readVInt()); // skip over payloadBytes
           }
           payloadByteUpto = 0;
         }
@@ -1074,13 +1098,40 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
       }
     }
 
+    private void accumulatePayloadAndOffsets() {
+      if (needsPayloads) {
+        payloadLength = payloadLengthBuffer[posBufferUpto];
+        payload.bytes = payloadBytes;
+        payload.offset = payloadByteUpto;
+        payload.length = payloadLength;
+        payloadByteUpto += payloadLength;
+      }
+
+      if (needsOffsets) {
+        startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
+        endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
+        lastStartOffset = startOffset;
+      }
+    }
+
     @Override
     public int nextPosition() throws IOException {
-      assert posPendingCount > 0;
+      if (posDocBufferUpto != docBufferUpto) {
+        int freq = freq(); // triggers lazy decoding of freqs
 
-      if (posPendingCount > freq) {
-        skipPositions();
-        posPendingCount = freq;
+        // First position that is being read on this doc.
+        posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, docBufferUpto);
+        posDocBufferUpto = docBufferUpto;
+
+        assert posPendingCount > 0;
+
+        if (posPendingCount > freq) {
+          skipPositions(freq);
+          posPendingCount = freq;
+        }
+
+        position = 0;
+        lastStartOffset = 0;
       }
 
       if (posBufferUpto == BLOCK_SIZE) {
@@ -1089,18 +1140,8 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
       }
       position += posDeltaBuffer[posBufferUpto];
 
-      if (indexHasPayloads) {
-        payloadLength = payloadLengthBuffer[posBufferUpto];
-        payload.bytes = payloadBytes;
-        payload.offset = payloadByteUpto;
-        payload.length = payloadLength;
-        payloadByteUpto += payloadLength;
-      }
-
-      if (indexHasOffsets) {
-        startOffset = lastStartOffset + offsetStartDeltaBuffer[posBufferUpto];
-        endOffset = startOffset + offsetLengthBuffer[posBufferUpto];
-        lastStartOffset = startOffset;
+      if (needsPayloadsOrOffsets) {
+        accumulatePayloadAndOffsets();
       }
 
       posBufferUpto++;
@@ -1110,17 +1151,23 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
 
     @Override
     public int startOffset() {
+      if (needsOffsets == false) {
+        return -1;
+      }
       return startOffset;
     }
 
     @Override
     public int endOffset() {
+      if (needsOffsets == false) {
+        return -1;
+      }
       return endOffset;
     }
 
     @Override
     public BytesRef getPayload() {
-      if (payloadLength == 0) {
+      if (needsPayloads == false || payloadLength == 0) {
         return null;
       } else {
         return payload;
@@ -1466,9 +1513,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
     final boolean indexHasPayloads;
     final boolean indexHasOffsetsOrPayloads;
 
-    private int freq; // freq we last read
+    private long freqFP; // offset of the freq block
+
     private int position; // current position
 
+    // value of docBufferUpto on the last doc ID when positions have been read
+    private int posDocBufferUpto;
+
     // how many positions "behind" we are; nextPosition must
     // skip these to "catch up":
     private int posPendingCount;
@@ -1516,8 +1567,13 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
     }
 
     @Override
-    public int freq() {
-      return freq;
+    public int freq() throws IOException {
+      if (freqFP != -1) {
+        docIn.seek(freqFP);
+        pforUtil.decode(docInUtil, freqBuffer);
+        freqFP = -1;
+      }
+      return freqBuffer[docBufferUpto - 1];
     }
 
     private void refillDocs() throws IOException {
@@ -1526,24 +1582,30 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
 
       if (left >= BLOCK_SIZE) {
         forDeltaUtil.decodeAndPrefixSum(docInUtil, prevDocID, docBuffer);
-        pforUtil.decode(docInUtil, freqBuffer);
+        freqFP = docIn.getFilePointer();
+        PForUtil.skip(docIn);
         docCountUpto += BLOCK_SIZE;
       } else if (docFreq == 1) {
         docBuffer[0] = singletonDocID;
         freqBuffer[0] = (int) totalTermFreq;
+        freqFP = -1;
         docBuffer[1] = NO_MORE_DOCS;
         docCountUpto++;
         docBufferSize = 1;
+
       } else {
         // Read vInts:
         PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
         prefixSum(docBuffer, left, prevDocID);
         docBuffer[left] = NO_MORE_DOCS;
+        freqFP = -1;
         docCountUpto += left;
         docBufferSize = left;
+        freqFP = -1;
       }
       prevDocID = docBuffer[BLOCK_SIZE - 1];
       docBufferUpto = 0;
+      posDocBufferUpto = 0;
       assert docBuffer[docBufferSize] == NO_MORE_DOCS;
     }
 
@@ -1585,20 +1647,14 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
     }
 
     private void skipLevel0To(int target) throws IOException {
+      long posFP;
+      int posUpto;
+
       while (true) {
         prevDocID = level0LastDocID;
 
-        // If nextBlockPosFP is less than the current FP, it means that the block of positions for
-        // the first docs of the next block are already decoded. In this case we just accumulate
-        // frequencies into posPendingCount instead of seeking backwards and decoding the same pos
-        // block again.
-        if (level0PosEndFP >= posIn.getFilePointer()) {
-          posIn.seek(level0PosEndFP);
-          posPendingCount = level0BlockPosUpto;
-          posBufferUpto = BLOCK_SIZE;
-        } else {
-          posPendingCount += sumOverRange(freqBuffer, docBufferUpto, BLOCK_SIZE);
-        }
+        posFP = level0PosEndFP;
+        posUpto = level0BlockPosUpto;
 
         if (docFreq - docCountUpto >= BLOCK_SIZE) {
           docIn.readVLong(); // skip0 num bytes
@@ -1631,6 +1687,18 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
           break;
         }
       }
+
+      // If nextBlockPosFP is less than the current FP, it means that the block of positions for
+      // the first docs of the next block are already decoded. In this case we just accumulate
+      // frequencies into posPendingCount instead of seeking backwards and decoding the same pos
+      // block again.
+      if (posFP >= posIn.getFilePointer()) {
+        posIn.seek(posFP);
+        posPendingCount = posUpto;
+        posBufferUpto = BLOCK_SIZE;
+      } else {
+        posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, BLOCK_SIZE);
+      }
     }
 
     @Override
@@ -1660,30 +1728,25 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
       }
 
       doc = docBuffer[docBufferUpto];
-      freq = freqBuffer[docBufferUpto];
-      posPendingCount += freq;
       docBufferUpto++;
-      position = 0;
       return this.doc;
     }
 
     @Override
     public int advance(int target) throws IOException {
-      advanceShallow(target);
-      if (needsRefilling) {
+      if (target > level0LastDocID || needsRefilling) {
+        advanceShallow(target);
+        assert needsRefilling;
         refillDocs();
         needsRefilling = false;
       }
 
       int next = VectorUtil.findNextGEQ(docBuffer, target, docBufferUpto, docBufferSize);
-      posPendingCount += sumOverRange(freqBuffer, docBufferUpto, next + 1);
-      freq = freqBuffer[next];
       docBufferUpto = next + 1;
-      position = 0;
       return this.doc = docBuffer[next];
     }
 
-    private void skipPositions() throws IOException {
+    private void skipPositions(int freq) throws IOException {
       // Skip positions now:
       int toSkip = posPendingCount - freq;
       // if (DEBUG) {
@@ -1703,8 +1766,6 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
         refillPositions();
         posBufferUpto = toSkip;
       }
-
-      position = 0;
     }
 
     private void refillPositions() throws IOException {
@@ -1739,11 +1800,21 @@ public final class Lucene101PostingsReader extends PostingsReaderBase {
 
     @Override
     public int nextPosition() throws IOException {
-      assert posPendingCount > 0;
+      if (posDocBufferUpto != docBufferUpto) {
+        int freq = freq(); // triggers lazy decoding of freqs
 
-      if (posPendingCount > freq) {
-        skipPositions();
-        posPendingCount = freq;
+        // First position that is being read on this doc.
+        posPendingCount += sumOverRange(freqBuffer, posDocBufferUpto, docBufferUpto);
+        posDocBufferUpto = docBufferUpto;
+
+        assert posPendingCount > 0;
+
+        if (posPendingCount > freq) {
+          skipPositions(freq);
+          posPendingCount = freq;
+        }
+
+        position = 0;
       }
 
       if (posBufferUpto == BLOCK_SIZE) {