LUCENE-9932: Performance improvement for BKD index building (#91)

2021-05-14 15:33:43 +08:00 · 2021-05-14 15:33:43 +08:00 · fd4b3c81d5
parent f215a55bc9
commit fd4b3c81d5
9 changed files with 434 additions and 59 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -368,6 +368,8 @@ Improvements
 Optimizations
 ---------------------

+* LUCENE-9932: Performance improvement for BKD index building (neoremind)
+
 * LUCENE-9827: Speed up merging of stored fields and term vectors for smaller segments.
  (Daniel Mitterdorfer, Dimitrios Liapis, Adrien Grand, Robert Muir)

--- a/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
@ -41,4 +41,10 @@ public abstract class MutablePointValues extends PointValues {

  /** Swap the i-th and j-th values. */
  public abstract void swap(int i, int j);
+
+  /** Save the i-th value into the j-th position in temporary storage. */
+  public abstract void save(int i, int j);
+
+  /** Restore values between i-th and j-th(excluding) in temporary storage into original storage. */
+  public abstract void restore(int i, int j);
 }
--- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
@ -89,6 +89,7 @@ class PointValuesWriter {
    PointValues points =
        new MutablePointValues() {
          final int[] ords = new int[numPoints];
+          int[] temp;

          {
            for (int i = 0; i < numPoints; ++i) {
@ -172,6 +173,21 @@ class PointValuesWriter {
            final long offset = (long) packedBytesLength * ords[i] + k;
            return bytes.readByte(offset);
          }
+
+          @Override
+          public void save(int i, int j) {
+            if (temp == null) {
+              temp = new int[ords.length];
+            }
+            temp[j] = ords[i];
+          }
+
+          @Override
+          public void restore(int i, int j) {
+            if (temp != null) {
+              System.arraycopy(temp, i, ords, i, j - i);
+            }
+          }
        };

    final PointValues values;
@ -291,5 +307,15 @@ class PointValuesWriter {
    public void swap(int i, int j) {
      in.swap(i, j);
    }
+
+    @Override
+    public void save(int i, int j) {
+      in.save(i, j);
+    }
+
+    @Override
+    public void restore(int i, int j) {
+      in.restore(i, j);
+    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
@ -33,7 +33,7 @@ public abstract class MSBRadixSorter extends Sorter {
  // locality)
  private static final int LEVEL_THRESHOLD = 8;
  // size of histograms: 256 + 1 to indicate that the string is finished
-  private static final int HISTOGRAM_SIZE = 257;
+  protected static final int HISTOGRAM_SIZE = 257;
  // buckets below this size will be sorted with introsort
  private static final int LENGTH_THRESHOLD = 100;

@ -42,7 +42,7 @@ public abstract class MSBRadixSorter extends Sorter {
  private final int[] endOffsets = new int[HISTOGRAM_SIZE];
  private final int[] commonPrefix;

-  private final int maxLength;
+  protected final int maxLength;

  /**
   * Sole constructor.
@ -128,7 +128,7 @@ public abstract class MSBRadixSorter extends Sorter {
    sort(from, to, 0, 0);
  }

-  private void sort(int from, int to, int k, int l) {
+  protected void sort(int from, int to, int k, int l) {
    if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
      introSort(from, to, k);
    } else {
@ -202,7 +202,7 @@ public abstract class MSBRadixSorter extends Sorter {
  }

  /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
-  private int getBucket(int i, int k) {
+  protected int getBucket(int i, int k) {
    return byteAt(i, k) + 1;
  }

@ -284,7 +284,7 @@ public abstract class MSBRadixSorter extends Sorter {
   * @param startOffsets start offsets per bucket
   * @param endOffsets end offsets per bucket
   */
-  private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
+  protected void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
    // reorder in place, like the dutch flag problem
    for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
      final int limit = endOffsets[i];
--- a/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java
@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util;
+
+/**
+ * Stable radix sorter for variable-length strings.
+ *
+ * @lucene.internal
+ */
+public abstract class StableMSBRadixSorter extends MSBRadixSorter {
+
+  private final int[] fixedStartOffsets;
+
+  public StableMSBRadixSorter(int maxLength) {
+    super(maxLength);
+    fixedStartOffsets = new int[HISTOGRAM_SIZE];
+  }
+
+  /** Save the i-th value into the j-th position in temporary storage. */
+  protected abstract void save(int i, int j);
+
+  /** Restore values between i-th and j-th(excluding) in temporary storage into original storage. */
+  protected abstract void restore(int i, int j);
+
+  @Override
+  protected Sorter getFallbackSorter(int k) {
+    return new InPlaceMergeSorter() {
+      @Override
+      protected void swap(int i, int j) {
+        StableMSBRadixSorter.this.swap(i, j);
+      }
+
+      @Override
+      protected int compare(int i, int j) {
+        for (int o = k; o < maxLength; ++o) {
+          final int b1 = byteAt(i, o);
+          final int b2 = byteAt(j, o);
+          if (b1 != b2) {
+            return b1 - b2;
+          } else if (b1 == -1) {
+            break;
+          }
+        }
+        return 0;
+      }
+    };
+  }
+
+  /**
+   * Reorder elements in stable way, since Dutch sort does not guarantee ordering for same values.
+   *
+   * <p>When this method returns, startOffsets and endOffsets are equal.
+   */
+  @Override
+  protected void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
+    System.arraycopy(startOffsets, 0, fixedStartOffsets, 0, startOffsets.length);
+    for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
+      final int limit = endOffsets[i];
+      for (int h1 = fixedStartOffsets[i]; h1 < limit; h1++) {
+        final int b = getBucket(from + h1, k);
+        final int h2 = startOffsets[b]++;
+        save(from + h1, from + h2);
+      }
+    }
+    restore(from, to);
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
@ -21,9 +21,9 @@ import org.apache.lucene.codecs.MutablePointValues;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntroSelector;
 import org.apache.lucene.util.IntroSorter;
-import org.apache.lucene.util.MSBRadixSorter;
 import org.apache.lucene.util.RadixSelector;
 import org.apache.lucene.util.Selector;
+import org.apache.lucene.util.StableMSBRadixSorter;
 import org.apache.lucene.util.packed.PackedInts;

 /**
@ -38,14 +38,39 @@ public final class MutablePointsReaderUtils {
  /** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */
  public static void sort(
      BKDConfig config, int maxDoc, MutablePointValues reader, int from, int to) {
-    final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
-    new MSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
+
+    boolean sortedByDocID = true;
+    int prevDoc = 0;
+    for (int i = from; i < to; ++i) {
+      int doc = reader.getDocID(i);
+      if (doc < prevDoc) {
+        sortedByDocID = false;
+        break;
+      }
+      prevDoc = doc;
+    }
+
+    // No need to tie break on doc IDs if already sorted by doc ID, since we use a stable sort.
+    // This should be a common situation as IndexWriter accumulates data in doc ID order when
+    // index sorting is not enabled.
+    final int bitsPerDocId = sortedByDocID ? 0 : PackedInts.bitsRequired(maxDoc - 1);
+    new StableMSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {

      @Override
      protected void swap(int i, int j) {
        reader.swap(i, j);
      }

+      @Override
+      protected void save(int i, int j) {
+        reader.save(i, j);
+      }
+
+      @Override
+      protected void restore(int i, int j) {
+        reader.restore(i, j);
+      }
+
      @Override
      protected int byteAt(int i, int k) {
        if (k < config.packedBytesLength) {
@ -55,46 +80,6 @@ public final class MutablePointsReaderUtils {
          return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
        }
      }
-
-      @Override
-      protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
-        return new IntroSorter() {
-
-          final BytesRef pivot = new BytesRef();
-          final BytesRef scratch = new BytesRef();
-          int pivotDoc;
-
-          @Override
-          protected void swap(int i, int j) {
-            reader.swap(i, j);
-          }
-
-          @Override
-          protected void setPivot(int i) {
-            reader.getValue(i, pivot);
-            pivotDoc = reader.getDocID(i);
-          }
-
-          @Override
-          protected int comparePivot(int j) {
-            if (k < config.packedBytesLength) {
-              reader.getValue(j, scratch);
-              int cmp =
-                  Arrays.compareUnsigned(
-                      pivot.bytes,
-                      pivot.offset + k,
-                      pivot.offset + k + config.packedBytesLength - k,
-                      scratch.bytes,
-                      scratch.offset + k,
-                      scratch.offset + k + config.packedBytesLength - k);
-              if (cmp != 0) {
-                return cmp;
-              }
-            }
-            return pivotDoc - reader.getDocID(j);
-          }
-        };
-      }
    }.sort(from, to);
  }

--- a/lucene/core/src/test/org/apache/lucene/util/TestStableMSBRadixSorter.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestStableMSBRadixSorter.java
@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TestStableMSBRadixSorter extends LuceneTestCase {
+
+  private void test(BytesRef[] refs, int len) {
+    BytesRef[] expected = ArrayUtil.copyOfSubArray(refs, 0, len);
+    Arrays.sort(expected);
+
+    int maxLength = 0;
+    for (int i = 0; i < len; ++i) {
+      BytesRef ref = refs[i];
+      maxLength = Math.max(maxLength, ref.length);
+    }
+    switch (random().nextInt(3)) {
+      case 0:
+        maxLength += TestUtil.nextInt(random(), 1, 5);
+        break;
+      case 1:
+        maxLength = Integer.MAX_VALUE;
+        break;
+      default:
+        // leave unchanged
+        break;
+    }
+
+    final int finalMaxLength = maxLength;
+    new StableMSBRadixSorter(maxLength) {
+
+      private BytesRef[] temp;
+
+      @Override
+      protected int byteAt(int i, int k) {
+        assertTrue(k < finalMaxLength);
+        BytesRef ref = refs[i];
+        if (ref.length <= k) {
+          return -1;
+        }
+        return ref.bytes[ref.offset + k] & 0xff;
+      }
+
+      @Override
+      protected void swap(int i, int j) {
+        BytesRef tmp = refs[i];
+        refs[i] = refs[j];
+        refs[j] = tmp;
+      }
+
+      @Override
+      protected void save(int i, int j) {
+        if (temp == null) {
+          temp = new BytesRef[refs.length];
+        }
+        temp[j] = refs[i];
+      }
+
+      @Override
+      protected void restore(int i, int j) {
+        if (temp != null) {
+          System.arraycopy(temp, i, refs, i, j - i);
+        }
+      }
+    }.sort(0, len);
+    BytesRef[] actual = ArrayUtil.copyOfSubArray(refs, 0, len);
+    assertArrayEquals(expected, actual);
+    // Verify that the arrays are not only equal after sorting with Arrays#sort and
+    // StableMSBRadixSorter
+    // but also that they have the very same instance at every index.
+    // This is different from MSBRadixSorter which does not guarantee ordering of the same value.
+    assertEquals(expected.length, actual.length);
+    for (int i = 0; i < expected.length; i++) {
+      assertSame(expected[i].bytes, actual[i].bytes);
+    }
+  }
+
+  public void testEmpty() {
+    test(new BytesRef[random().nextInt(5)], 0);
+  }
+
+  public void testOneValue() {
+    BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random()));
+    test(new BytesRef[] {bytes}, 1);
+  }
+
+  public void testTwoValues() {
+    BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random()));
+    BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random()));
+    test(new BytesRef[] {bytes1, bytes2}, 2);
+  }
+
+  private void testRandom(int commonPrefixLen, int maxLen) {
+    byte[] commonPrefix = new byte[commonPrefixLen];
+    random().nextBytes(commonPrefix);
+    final int len = random().nextInt(100000);
+    BytesRef[] bytes = new BytesRef[len + random().nextInt(50)];
+    for (int i = 0; i < len; ++i) {
+      byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)];
+      random().nextBytes(b);
+      System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen);
+      bytes[i] = new BytesRef(b);
+    }
+    test(bytes, len);
+  }
+
+  public void testRandom() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(0, 10);
+    }
+  }
+
+  public void testRandomWithLotsOfDuplicates() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(0, 2);
+    }
+  }
+
+  public void testRandomWithSharedPrefix() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(TestUtil.nextInt(random(), 1, 30), 10);
+    }
+  }
+
+  public void testRandomWithSharedPrefixAndLotsOfDuplicates() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(TestUtil.nextInt(random(), 1, 30), 2);
+    }
+  }
+
+  public void testRandom2() {
+    // how large our alphabet is
+    int letterCount = TestUtil.nextInt(random(), 2, 10);
+
+    // how many substring fragments to use
+    int substringCount = TestUtil.nextInt(random(), 2, 10);
+    Set<BytesRef> substringsSet = new HashSet<>();
+
+    // how many strings to make
+    int stringCount = atLeast(10000);
+
+    // System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + "
+    // stringCount=" + stringCount);
+    while (substringsSet.size() < substringCount) {
+      int length = TestUtil.nextInt(random(), 2, 10);
+      byte[] bytes = new byte[length];
+      for (int i = 0; i < length; i++) {
+        bytes[i] = (byte) random().nextInt(letterCount);
+      }
+      BytesRef br = new BytesRef(bytes);
+      substringsSet.add(br);
+      // System.out.println("add substring count=" + substringsSet.size() + ": " + br);
+    }
+
+    BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
+    double[] chance = new double[substrings.length];
+    double sum = 0.0;
+    for (int i = 0; i < substrings.length; i++) {
+      chance[i] = random().nextDouble();
+      sum += chance[i];
+    }
+
+    // give each substring a random chance of occurring:
+    double accum = 0.0;
+    for (int i = 0; i < substrings.length; i++) {
+      accum += chance[i] / sum;
+      chance[i] = accum;
+    }
+
+    Set<BytesRef> stringsSet = new HashSet<>();
+    int iters = 0;
+    while (stringsSet.size() < stringCount && iters < stringCount * 5) {
+      int count = TestUtil.nextInt(random(), 1, 5);
+      BytesRefBuilder b = new BytesRefBuilder();
+      for (int i = 0; i < count; i++) {
+        double v = random().nextDouble();
+        accum = 0.0;
+        for (int j = 0; j < substrings.length; j++) {
+          accum += chance[j];
+          if (accum >= v) {
+            b.append(substrings[j]);
+            break;
+          }
+        }
+      }
+      BytesRef br = b.toBytesRef();
+      stringsSet.add(br);
+      // System.out.println("add string count=" + stringsSet.size() + ": " + br);
+      iters++;
+    }
+
+    test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
+  }
+}
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@ -1696,6 +1696,18 @@ public class TestBKD extends LuceneTestCase {

          @Override
          public byte getByteAt(int i, int k) {
+            BytesRef b = new BytesRef();
+            getValue(i, b);
+            return b.bytes[b.offset + k];
+          }
+
+          @Override
+          public void save(int i, int j) {
+            throw new UnsupportedOperationException();
+          }
+
+          @Override
+          public void restore(int i, int j) {
            throw new UnsupportedOperationException();
          }
        };
@ -1839,6 +1851,16 @@ public class TestBKD extends LuceneTestCase {
          public int getDocCount() {
            return 11;
          }
+
+          @Override
+          public void save(int i, int j) {
+            throw new UnsupportedOperationException();
+          }
+
+          @Override
+          public void restore(int i, int j) {
+            throw new UnsupportedOperationException();
+          }
        };
    try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
      IllegalStateException ex =
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
@ -28,16 +28,22 @@ import org.apache.lucene.util.TestUtil;
 public class TestMutablePointsReaderUtils extends LuceneTestCase {

  public void testSort() {
-    for (int iter = 0; iter < 5; ++iter) {
-      doTestSort();
+    for (int iter = 0; iter < 10; ++iter) {
+      doTestSort(false);
    }
  }

-  private void doTestSort() {
+  public void testSortWithIncrementalDocId() {
+    for (int iter = 0; iter < 10; ++iter) {
+      doTestSort(true);
+    }
+  }
+
+  private void doTestSort(boolean isDocIdIncremental) {
    final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
    final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
    BKDConfig config = new BKDConfig(1, 1, bytesPerDim, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
-    Point[] points = createRandomPoints(config, maxDoc, new int[1]);
+    Point[] points = createRandomPoints(config, maxDoc, new int[1], isDocIdIncremental);
    DummyPointsReader reader = new DummyPointsReader(points);
    MutablePointsReaderUtils.sort(config, maxDoc, reader, 0, points.length);
    Arrays.sort(
@ -53,7 +59,23 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
          }
        });
    assertNotSame(points, reader.points);
-    assertArrayEquals(points, reader.points);
+    assertEquals(points.length, reader.points.length);
+
+    // Check doc IDs are in ascending order.
+    // If doc IDs are already increasing, StableMSBRadixSorter should keep doc ID's ordering.
+    // If doc IDs are not ordered, StableMSBRadixSorter should compare doc ID to guarantee the
+    // ordering.
+    Point prevPoint = null;
+    for (int i = 0; i < points.length; i++) {
+      assertEquals(points[i].packedValue, reader.points[i].packedValue);
+      assertSame(points[i].packedValue, reader.points[i].packedValue);
+      if (prevPoint != null) {
+        if (reader.points[i].packedValue.equals(prevPoint.packedValue)) {
+          assertTrue(reader.points[i].doc >= prevPoint.doc);
+        }
+      }
+      prevPoint = reader.points[i];
+    }
  }

  public void testSortByDim() {
@ -66,7 +88,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
    BKDConfig config = createRandomConfig();
    final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
    int[] commonPrefixLengths = new int[config.numDims];
-    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
+    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
    DummyPointsReader reader = new DummyPointsReader(points);
    final int sortedDim = random().nextInt(config.numIndexDims);
    MutablePointsReaderUtils.sortByDim(
@ -119,7 +141,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
    BKDConfig config = createRandomConfig();
    int[] commonPrefixLengths = new int[config.numDims];
    final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
-    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
+    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
    final int splitDim = random().nextInt(config.numIndexDims);
    DummyPointsReader reader = new DummyPointsReader(points);
    final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
@ -180,15 +202,17 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
  }

  private static Point[] createRandomPoints(
-      BKDConfig config, int maxDoc, int[] commonPrefixLengths) {
+      BKDConfig config, int maxDoc, int[] commonPrefixLengths, boolean isDocIdIncremental) {
    assertTrue(commonPrefixLengths.length == config.numDims);
    final int numPoints = TestUtil.nextInt(random(), 1, 100000);
    Point[] points = new Point[numPoints];
-    if (random().nextInt(5) != 0) {
+    if (random().nextInt(10) != 0) {
      for (int i = 0; i < numPoints; ++i) {
        byte[] value = new byte[config.packedBytesLength];
        random().nextBytes(value);
-        points[i] = new Point(value, random().nextInt(maxDoc));
+        points[i] =
+            new Point(
+                value, isDocIdIncremental ? Math.min(i, maxDoc - 1) : random().nextInt(maxDoc));
      }
      for (int i = 0; i < config.numDims; ++i) {
        commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim);
@ -218,7 +242,8 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
        random().nextBytes(dataDims);
        System.arraycopy(
            dataDims, 0, value, config.packedIndexBytesLength, numDataDims * config.bytesPerDim);
-        points[i] = new Point(value, random().nextInt(maxDoc));
+        points[i] =
+            new Point(value, isDocIdIncremental ? Math.min(i, maxDoc) : random().nextInt(maxDoc));
      }
      for (int i = 0; i < config.numIndexDims; ++i) {
        commonPrefixLengths[i] = config.bytesPerDim;
@ -281,6 +306,8 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {

    private final Point[] points;

+    private Point[] temp;
+
    DummyPointsReader(Point[] points) {
      this.points = points.clone();
    }
@ -352,5 +379,20 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
    public int getDocCount() {
      throw new UnsupportedOperationException();
    }
+
+    @Override
+    public void save(int i, int j) {
+      if (temp == null) {
+        temp = new Point[points.length];
+      }
+      temp[j] = points[i];
+    }
+
+    @Override
+    public void restore(int i, int j) {
+      if (temp != null) {
+        System.arraycopy(temp, i, points, i, j - i);
+      }
+    }
  }
 }