Speed up advancing within a block. (#13692)

Advancing within a block consists of finding the first index within an array of 128 values whose value is greater than or equal a target. Given the small size, it's not obvious whether it's better to perform a linear search, a binary search or something else... It is surprisingly hard to beat the linear search that we are using today. Experiments suggested that the following approach works in practice: - First check if the next item in the array is greater than or equal to the target. - Then find the first 4-values interval that contains our target. - Then perform a branchless binary search within this interval of 4 values. This approach still biases heavily towards the case when the target is very close to the current index, only a bit less than a linear search.
2025-03-06 16:29:30 +00:00 · 2024-08-28 17:31:08 +02:00 · 2024-08-28 17:31:08 +02:00 · 79fd9fee97
commit 79fd9fee97
parent d55b92bae0
3 changed files with 486 additions and 33 deletions
--- a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/AdvanceBenchmark.java
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/AdvanceBenchmark.java
@ -0,0 +1,376 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.CompilerControl;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(
+    value = 1,
+    jvmArgsAppend = {"-Xmx1g", "-Xms1g", "-XX:+AlwaysPreTouch"})
+public class AdvanceBenchmark {
+
+  private final long[] values = new long[129];
+  private final int[] startIndexes = new int[1_000];
+  private final long[] targets = new long[startIndexes.length];
+
+  @Setup(Level.Trial)
+  public void setup() throws Exception {
+    for (int i = 0; i < 128; ++i) {
+      values[i] = i;
+    }
+    values[128] = DocIdSetIterator.NO_MORE_DOCS;
+    Random r = new Random(0);
+    for (int i = 0; i < startIndexes.length; ++i) {
+      startIndexes[i] = r.nextInt(64);
+      targets[i] = startIndexes[i] + 1 + r.nextInt(1 << r.nextInt(7));
+    }
+  }
+
+  @Benchmark
+  public void binarySearch() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      binarySearch(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int binarySearch(long[] values, long target, int startIndex) {
+    // Standard binary search
+    int i = Arrays.binarySearch(values, startIndex, values.length, target);
+    if (i < 0) {
+      i = -1 - i;
+    }
+    return i;
+  }
+
+  @Benchmark
+  public void binarySearch2() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      binarySearch2(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int binarySearch2(long[] values, long target, int startIndex) {
+    // Try to help the compiler by providing predictable start/end offsets.
+    int i = Arrays.binarySearch(values, 0, 128, target);
+    if (i < 0) {
+      i = -1 - i;
+    }
+    return i;
+  }
+
+  @Benchmark
+  public void binarySearch3() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      binarySearch3(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int binarySearch3(long[] values, long target, int startIndex) {
+    // Organize code the same way as suggested in https://quickwit.io/blog/search-a-sorted-block,
+    // which proved to help with LLVM.
+    int start = 0;
+    int length = 128;
+
+    while (length > 1) {
+      length /= 2;
+      if (values[start + length - 1] < target) {
+        start += length;
+      }
+    }
+    return start;
+  }
+
+  @Benchmark
+  public void binarySearch4() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      binarySearch4(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int binarySearch4(long[] values, long target, int startIndex) {
+    // Explicitly inline the binary-search logic to see if it helps the compiler.
+    int start = 0;
+
+    if (values[63] < target) {
+      start += 64;
+    }
+    if (values[start + 31] < target) {
+      start += 32;
+    }
+    if (values[start + 15] < target) {
+      start += 16;
+    }
+    if (values[start + 7] < target) {
+      start += 8;
+    }
+    if (values[start + 3] < target) {
+      start += 4;
+    }
+    if (values[start + 1] < target) {
+      start += 2;
+    }
+    if (values[start] < target) {
+      start += 1;
+    }
+
+    return start;
+  }
+
+  @Benchmark
+  public void binarySearch5() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      binarySearch5(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int binarySearch5(long[] values, long target, int startIndex) {
+    // Other way to write a binary search
+    int start = 0;
+
+    for (int shift = 6; shift >= 0; --shift) {
+      int halfRange = 1 << shift;
+      if (values[start + halfRange - 1] < target) {
+        start += halfRange;
+      }
+    }
+
+    return start;
+  }
+
+  @Benchmark
+  public void binarySearch6() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      binarySearch6(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int binarySearch6(long[] values, long target, int startIndex) {
+    // Other way to write a binary search
+    int start = 0;
+
+    for (int halfRange = 64; halfRange > 0; halfRange >>= 1) {
+      if (values[start + halfRange - 1] < target) {
+        start += halfRange;
+      }
+    }
+
+    return start;
+  }
+
+  @Benchmark
+  public void linearSearch() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      linearSearch(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int linearSearch(long[] values, long target, int startIndex) {
+    // Naive linear search.
+    for (int i = startIndex; i < values.length; ++i) {
+      if (values[i] >= target) {
+        return i;
+      }
+    }
+    return values.length;
+  }
+
+  @Benchmark
+  public void bruteForceSearch() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      bruteForceSearch(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int bruteForceSearch(long[] values, long target, int startIndex) {
+    // Linear search with predictable start/end offsets to see if it helps the compiler.
+    for (int i = 0; i < 128; ++i) {
+      if (values[i] >= target) {
+        return i;
+      }
+    }
+    return values.length;
+  }
+
+  @Benchmark
+  public void linearSearch2() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      linearSearch2(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int linearSearch2(long[] values, long target, int startIndex) {
+    // Two-level linear search, first checking every 8-th value, then values within an 8-value range
+    int rangeStart = values.length - 8;
+
+    for (int i = startIndex; i + 8 <= values.length; i += 8) {
+      if (values[i + 7] >= target) {
+        rangeStart = i;
+        break;
+      }
+    }
+
+    for (int i = 0; i < 8; ++i) {
+      if (values[rangeStart + i] >= target) {
+        return rangeStart + i;
+      }
+    }
+
+    return values.length;
+  }
+
+  @Benchmark
+  public void linearSearch3() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      linearSearch3(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int linearSearch3(long[] values, long target, int startIndex) {
+    // Iteration over linearSearch that tries to reduce branches
+    while (startIndex + 4 <= values.length) {
+      int count = values[startIndex] < target ? 1 : 0;
+      if (values[startIndex + 1] < target) {
+        count++;
+      }
+      if (values[startIndex + 2] < target) {
+        count++;
+      }
+      if (values[startIndex + 3] < target) {
+        count++;
+      }
+      if (count != 4) {
+        return startIndex + count;
+      }
+      startIndex += 4;
+    }
+
+    for (int i = startIndex; i < values.length; ++i) {
+      if (values[i] >= target) {
+        return i;
+      }
+    }
+
+    return values.length;
+  }
+
+  @Benchmark
+  public void hybridSearch() {
+    for (int i = 0; i < startIndexes.length; ++i) {
+      hybridSearch(values, targets[i], startIndexes[i]);
+    }
+  }
+
+  @CompilerControl(CompilerControl.Mode.DONT_INLINE)
+  private static int hybridSearch(long[] values, long target, int startIndex) {
+    // Two-level linear search, first checking every 8-th value, then values within an 8-value range
+    int rangeStart = values.length - 8;
+
+    for (int i = startIndex; i + 8 <= values.length; i += 8) {
+      if (values[i + 7] >= target) {
+        rangeStart = i;
+        break;
+      }
+    }
+
+    return binarySearchHelper8(values, target, rangeStart);
+  }
+
+  // branchless binary search over 8 values
+  private static int binarySearchHelper8(long[] values, long target, int start) {
+    if (values[start + 3] < target) {
+      start += 4;
+    }
+    if (values[start + 1] < target) {
+      start += 2;
+    }
+    if (values[start] < target) {
+      start += 1;
+    }
+    return start;
+  }
+
+  private static void assertEquals(int expected, int actual) {
+    if (expected != actual) {
+      throw new AssertionError("Expected: " + expected + ", got " + actual);
+    }
+  }
+
+  public static void main(String[] args) {
+    // For testing purposes
+    long[] values = new long[129];
+    for (int i = 0; i < 128; ++i) {
+      values[i] = i;
+    }
+    values[128] = DocIdSetIterator.NO_MORE_DOCS;
+    for (int start = 0; start < 128; ++start) {
+      for (int targetIndex = start; targetIndex < 128; ++targetIndex) {
+        int actualIndex = binarySearch(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = binarySearch2(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = binarySearch3(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = binarySearch4(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = binarySearch5(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = binarySearch6(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = bruteForceSearch(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = hybridSearch(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = linearSearch(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = linearSearch2(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+        actualIndex = linearSearch3(values, values[targetIndex], start);
+        assertEquals(targetIndex, actualIndex);
+      }
+    }
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene912/Lucene912PostingsReader.java
@ -67,6 +67,7 @@ import org.apache.lucene.util.IOUtils;
 public final class Lucene912PostingsReader extends PostingsReaderBase {

  static final VectorizationProvider VECTORIZATION_PROVIDER = VectorizationProvider.getInstance();
+  static int BINARY_SEARCH_WINDOW_SIZE = 4;

  private final IndexInput docIn;
  private final IndexInput posIn;
@ -212,13 +213,74 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
    }
  }

-  static int findFirstGreater(long[] buffer, int target, int from) {
-    for (int i = from; i < BLOCK_SIZE; ++i) {
-      if (buffer[i] >= target) {
-        return i;
+  private static boolean assertDocBuffer(long[] values, int start) {
+    assert values.length == BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE;
+    assert values[BLOCK_SIZE] == DocIdSetIterator.NO_MORE_DOCS;
+    assert start < BLOCK_SIZE;
+
+    int endOffset;
+    if (values[0] == DocIdSetIterator.NO_MORE_DOCS) {
+      endOffset = 0;
+    } else {
+      endOffset = -1;
+      for (int i = 1; i < values.length; ++i) {
+        assert values[i] > values[i - 1] : Arrays.toString(values);
+        if (values[i] == DocIdSetIterator.NO_MORE_DOCS) {
+          endOffset = i;
+          break;
+        }
      }
    }
-    return BLOCK_SIZE;
+    for (int i = 0; i < BINARY_SEARCH_WINDOW_SIZE; ++i) {
+      assert values[endOffset + i] == DocIdSetIterator.NO_MORE_DOCS;
+    }
+    return true;
+  }
+
+  /**
+   * Return the first index in sorted array {@code values} whose value is greater than or equal to
+   * {@code target}. For correctness, it requires the last 4 values to be set to {@code
+   * NO_MORE_DOCS}.
+   */
+  static int findNextGEQ(long[] values, long target, int start) {
+    assert assertDocBuffer(values, start);
+
+    if (values[start] >= target) {
+      // Surprisingly this is a likely condition in practice, so optimizing for it helps.
+      return start;
+    }
+
+    // We just ruled out that our target index is at `start`.
+    start += 1;
+
+    // Now find the first interval of 4 values that contains our target.
+    for (int i = start;
+        i + BINARY_SEARCH_WINDOW_SIZE <= values.length;
+        i += BINARY_SEARCH_WINDOW_SIZE) {
+      if (values[i + BINARY_SEARCH_WINDOW_SIZE - 1] >= target) {
+        start = i;
+        break;
+      }
+    }
+
+    // Binary search in this interval of 4 values.
+    return binarySearch4(values, target, start);
+  }
+
+  /**
+   * Return the first index whose value is greater than or equal to {@code target} among the 4
+   * values starting at {@code start}. If none of the values is greater than or equal to {@code
+   * target}, this returns {@code start+3}.
+   */
+  private static int binarySearch4(long[] values, long target, int start) {
+    // This code is organized in a way that compiles to a branchless binary search.
+    if (values[start + 1] < target) {
+      start += 2;
+    }
+    if (values[start] < target) {
+      start += 1;
+    }
+    return start;
  }

  @Override
@ -348,7 +410,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
    final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
    final PForUtil pforUtil = new PForUtil(forUtil);

-    private final long[] docBuffer = new long[BLOCK_SIZE + 1];
+    private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
    private final long[] freqBuffer = new long[BLOCK_SIZE];

    private int docBufferUpto;
@ -390,9 +452,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
                      .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                  >= 0
              || fieldInfo.hasPayloads();
-      // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+      // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
      // advance()
-      docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+      Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
    }

    public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@ -498,7 +560,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
      docCountUpto += BLOCK_SIZE;
      prevDocID = docBuffer[BLOCK_SIZE - 1];
      docBufferUpto = 0;
-      assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
+      assert assertDocBuffer(docBuffer, 0);
    }

    private void refillRemainder() throws IOException {
@ -509,15 +571,14 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
      if (docFreq == 1) {
        docBuffer[0] = singletonDocID;
        freqBuffer[0] = totalTermFreq;
-        docBuffer[1] = NO_MORE_DOCS;
        docCountUpto++;
      } else {
        // Read vInts:
        PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, needsFreq);
        prefixSum(docBuffer, left, prevDocID);
-        docBuffer[left] = NO_MORE_DOCS;
        docCountUpto += left;
      }
+      Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
      docBufferUpto = 0;
      freqFP = -1;
    }
@ -615,7 +676,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
        }
      }

-      int next = findFirstGreater(docBuffer, target, docBufferUpto);
+      int next = findNextGEQ(docBuffer, target, docBufferUpto);
      this.doc = (int) docBuffer[next];
      docBufferUpto = next + 1;
      return doc;
@ -633,8 +694,8 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
    final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
    final PForUtil pforUtil = new PForUtil(forUtil);

-    private final long[] docBuffer = new long[BLOCK_SIZE + 1];
-    private final long[] freqBuffer = new long[BLOCK_SIZE + 1];
+    private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
+    private final long[] freqBuffer = new long[BLOCK_SIZE];
    private final long[] posDeltaBuffer = new long[BLOCK_SIZE];

    private final long[] payloadLengthBuffer;
@ -755,9 +816,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
        payload = null;
      }

-      // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+      // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
      // advance()
-      docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+      Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
    }

    public boolean canReuse(IndexInput docIn, FieldInfo fieldInfo) {
@ -849,18 +910,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
      } else if (docFreq == 1) {
        docBuffer[0] = singletonDocID;
        freqBuffer[0] = totalTermFreq;
-        docBuffer[1] = NO_MORE_DOCS;
+        Arrays.fill(docBuffer, 1, 1 + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
        docCountUpto++;
      } else {
        // Read vInts:
        PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
        prefixSum(docBuffer, left, prevDocID);
-        docBuffer[left] = NO_MORE_DOCS;
+        Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
        docCountUpto += left;
      }
      prevDocID = docBuffer[BLOCK_SIZE - 1];
      docBufferUpto = 0;
-      assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
+      assert assertDocBuffer(docBuffer, 0);
    }

    private void skipLevel1To(int target) throws IOException {
@ -1022,7 +1083,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
        refillDocs();
      }

-      int next = findFirstGreater(docBuffer, target, docBufferUpto);
+      int next = findNextGEQ(docBuffer, target, docBufferUpto);
      for (int i = docBufferUpto; i <= next; ++i) {
        posPendingCount += freqBuffer[i];
      }
@ -1223,7 +1284,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
    final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
    final PForUtil pforUtil = new PForUtil(forUtil);

-    private final long[] docBuffer = new long[BLOCK_SIZE + 1];
+    private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
    private final long[] freqBuffer = new long[BLOCK_SIZE];

    private int docBufferUpto;
@ -1271,9 +1332,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
                      .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
                  >= 0
              || fieldInfo.hasPayloads();
-      // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+      // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
      // advance()
-      docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+      Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);

      docFreq = termState.docFreq;
      if (docFreq > 1) {
@ -1363,13 +1424,13 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
        // Read vInts:
        PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
        prefixSum(docBuffer, left, prevDocID);
-        docBuffer[left] = NO_MORE_DOCS;
+        Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
        freqFP = -1;
        docCountUpto += left;
      }
      prevDocID = docBuffer[BLOCK_SIZE - 1];
      docBufferUpto = 0;
-      assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
+      assert assertDocBuffer(docBuffer, 0);
    }

    private void skipLevel1To(int target) throws IOException {
@ -1495,7 +1556,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
        needsRefilling = false;
      }

-      int next = findFirstGreater(docBuffer, target, docBufferUpto);
+      int next = findNextGEQ(docBuffer, target, docBufferUpto);
      this.doc = (int) docBuffer[next];
      docBufferUpto = next + 1;
      return doc;
@ -1578,7 +1639,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
    final ForDeltaUtil forDeltaUtil = new ForDeltaUtil();
    final PForUtil pforUtil = new PForUtil(forUtil);

-    private final long[] docBuffer = new long[BLOCK_SIZE + 1];
+    private final long[] docBuffer = new long[BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE];
    private final long[] freqBuffer = new long[BLOCK_SIZE];
    private final long[] posDeltaBuffer = new long[BLOCK_SIZE];

@ -1658,9 +1719,9 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
      this.posIn = Lucene912PostingsReader.this.posIn.clone();
      posInUtil = VECTORIZATION_PROVIDER.newPostingDecodingUtil(posIn);

-      // We set the last element of docBuffer to NO_MORE_DOCS, it helps save conditionals in
+      // We set the last elements of docBuffer to NO_MORE_DOCS, it helps save conditionals in
      // advance()
-      docBuffer[BLOCK_SIZE] = NO_MORE_DOCS;
+      Arrays.fill(docBuffer, BLOCK_SIZE, BLOCK_SIZE + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);

      docFreq = termState.docFreq;
      posTermStartFP = termState.posStartFP;
@ -1730,18 +1791,18 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
      } else if (docFreq == 1) {
        docBuffer[0] = singletonDocID;
        freqBuffer[0] = totalTermFreq;
-        docBuffer[1] = NO_MORE_DOCS;
+        Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
        docCountUpto++;
      } else {
        // Read vInts:
        PostingsUtil.readVIntBlock(docIn, docBuffer, freqBuffer, left, indexHasFreq, true);
        prefixSum(docBuffer, left, prevDocID);
-        docBuffer[left] = NO_MORE_DOCS;
+        Arrays.fill(docBuffer, left, left + BINARY_SEARCH_WINDOW_SIZE, NO_MORE_DOCS);
        docCountUpto += left;
      }
      prevDocID = docBuffer[BLOCK_SIZE - 1];
      docBufferUpto = 0;
-      assert docBuffer[BLOCK_SIZE] == NO_MORE_DOCS;
+      assert assertDocBuffer(docBuffer, 0);
    }

    private void skipLevel1To(int target) throws IOException {
@ -1938,7 +1999,7 @@ public final class Lucene912PostingsReader extends PostingsReaderBase {
        needsRefilling = false;
      }

-      int next = findFirstGreater(docBuffer, target, docBufferUpto);
+      int next = findNextGEQ(docBuffer, target, docBufferUpto);
      for (int i = docBufferUpto; i <= next; ++i) {
        posPendingCount += freqBuffer[i];
      }
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestLucene912PostingsFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene912/TestLucene912PostingsFormat.java
@ -31,6 +31,7 @@ import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.Impact;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.Directory;
@ -154,4 +155,19 @@ public class TestLucene912PostingsFormat extends BasePostingsFormatTestCase {
      }
    }
  }
+
+  public void testFindNextGEQ() {
+    long[] values =
+        new long[ForUtil.BLOCK_SIZE + Lucene912PostingsReader.BINARY_SEARCH_WINDOW_SIZE];
+    for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
+      values[i] = i * 2;
+    }
+    Arrays.fill(values, ForUtil.BLOCK_SIZE, values.length, DocIdSetIterator.NO_MORE_DOCS);
+    for (int i = 0; i < ForUtil.BLOCK_SIZE; ++i) {
+      for (int start = 0; start <= i; ++start) {
+        assertEquals(i, Lucene912PostingsReader.findNextGEQ(values, i * 2, start));
+        assertEquals(i + 1, Lucene912PostingsReader.findNextGEQ(values, i * 2 + 1, start));
+      }
+    }
+  }
 }