LUCENE-7399: Speed up flush of points.

2016-07-29 17:32:29 +02:00 · 2016-07-29 17:32:29 +02:00 · 9fc4624853
parent 234ea3ef89
commit 9fc4624853
16 changed files with 368 additions and 133 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -156,7 +156,8 @@ Optimizations
 * LUCENE-7311: Cached term queries do not seek the terms dictionary anymore.
  (Adrien Grand)

-* LUCENE-7396: Faster flush of points. (Adrien Grand, Mike McCandless)
+* LUCENE-7396, LUCENE-7399: Faster flush of points.
+  (Adrien Grand, Mike McCandless)

 Other

--- a/lucene/core/src/java/org/apache/lucene/codecs/MutablePointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointsReader.java
@ -16,6 +16,8 @@
 */
 package org.apache.lucene.codecs;

+import org.apache.lucene.util.BytesRef;
+
 /** {@link PointsReader} whose order of points can be changed.
 *  This class is useful for codecs to optimize flush.
 *  @lucene.internal */
@ -24,8 +26,8 @@ public abstract class MutablePointsReader extends PointsReader {
  /** Sole constructor. */
  protected MutablePointsReader() {}

-  /** Fill {@code packedValue} with the packed bytes of the i-th value. */
-  public abstract void getValue(int i, byte[] packedValue);
+  /** Set {@code packedValue} with a reference to the packed bytes of the i-th value. */
+  public abstract void getValue(int i, BytesRef packedValue);

  /** Get the k-th byte of the i-th value. */
  public abstract byte getByteAt(int i, int k);
--- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
@ -51,9 +51,10 @@ class PointValuesWriter {
    if (value == null) {
      throw new IllegalArgumentException("field=" + fieldInfo.name + ": point value must not be null");
    }
-    if (value.length != fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()) {
+    if (value.length != packedBytesLength) {
      throw new IllegalArgumentException("field=" + fieldInfo.name + ": this field's value has length=" + value.length + " but should be " + (fieldInfo.getPointDimensionCount() * fieldInfo.getPointNumBytes()));
    }
+
    if (docIDs.length == numPoints) {
      docIDs = ArrayUtil.grow(docIDs, numPoints+1);
      iwBytesUsed.addAndGet((docIDs.length - numPoints) * Integer.BYTES);
@ -64,6 +65,7 @@ class PointValuesWriter {
      numDocs++;
      lastDocID = docID;
    }
+
    numPoints++;
  }

@ -82,9 +84,12 @@ class PointValuesWriter {
        if (fieldName.equals(fieldInfo.name) == false) {
          throw new IllegalArgumentException("fieldName must be the same");
        }
+        final BytesRef scratch = new BytesRef();
        final byte[] packedValue = new byte[packedBytesLength];
        for(int i=0;i<numPoints;i++) {
-          getValue(i, packedValue);
+          getValue(i, scratch);
+          assert scratch.length == packedValue.length;
+          System.arraycopy(scratch.bytes, scratch.offset, packedValue, 0, packedBytesLength);
          visitor.visit(getDocID(i), packedValue);
        }
      }
@ -152,9 +157,10 @@ class PointValuesWriter {
      }

      @Override
-      public void getValue(int i, byte[] packedValue) {
+      public void getValue(int i, BytesRef packedValue) {
        final long offset = (long) packedBytesLength * ords[i];
-        bytes.readBytes(offset, packedValue, 0, packedBytesLength);
+        packedValue.length = packedBytesLength;
+        bytes.setRawBytesRef(packedValue, offset);
      }

      @Override
--- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
@ -379,6 +379,27 @@ public final class ByteBlockPool {
    } while (true);
  }

+  /**
+   * Set the given {@link BytesRef} so that its content is equal to the
+   * {@code ref.length} bytes starting at {@code offset}. Most of the time this
+   * method will set pointers to internal data-structures. However, in case a
+   * value crosses a boundary, a fresh copy will be returned.
+   * On the contrary to {@link #setBytesRef(BytesRef, int)}, this does not
+   * expect the length to be encoded with the data.
+   */
+  public void setRawBytesRef(BytesRef ref, final long offset) {
+    int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
+    int pos = (int) (offset & BYTE_BLOCK_MASK);
+    if (pos + ref.length <= BYTE_BLOCK_SIZE) {
+      ref.bytes = buffers[bufferIndex];
+      ref.offset = pos;
+    } else {
+      ref.bytes = new byte[ref.length];
+      ref.offset = 0;
+      readBytes(offset, ref.bytes, 0, ref.length);
+    }
+  }
+
  /** Read a single byte at the given {@code offset}. */
  public byte readByte(long offset) {
    int bufferIndex = (int) (offset >> BYTE_BLOCK_SHIFT);
--- a/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/InPlaceMergeSorter.java
@ -33,8 +33,8 @@ public abstract class InPlaceMergeSorter extends Sorter {
  }

  void mergeSort(int from, int to) {
-    if (to - from < INSERTION_SORT_THRESHOLD) {
-      insertionSort(from, to);
+    if (to - from < BINARY_SORT_THRESHOLD) {
+      binarySort(from, to);
    } else {
      final int mid = (from + to) >>> 1;
      mergeSort(from, mid);
--- a/lucene/core/src/java/org/apache/lucene/util/IntroSelector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntroSelector.java
@ -34,6 +34,8 @@ public abstract class IntroSelector extends Selector {
  }

  // heap sort
+  // TODO: use median of median instead to have linear worst-case rather than
+  // n*log(n)
  void slowSelect(int from, int to, int k) {
    new Sorter() {

--- a/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
@ -37,8 +37,8 @@ public abstract class IntroSorter extends Sorter {
  }

  void quicksort(int from, int to, int maxDepth) {
-    if (to - from < INSERTION_SORT_THRESHOLD) {
-      insertionSort(from, to);
+    if (to - from < BINARY_SORT_THRESHOLD) {
+      binarySort(from, to);
      return;
    } else if (--maxDepth < 0) {
      heapSort(from, to);
@ -83,12 +83,13 @@ public abstract class IntroSorter extends Sorter {
    quicksort(left + 1, to, maxDepth);
  }

-  /** Save the value at slot <code>i</code> so that it can later be used as a
-   * pivot, see {@link #comparePivot(int)}. */
+  // Don't rely on the slow default impl of setPivot/comparePivot since
+  // quicksort relies on these methods to be fast for good performance
+
+  @Override
  protected abstract void setPivot(int i);

-  /** Compare the pivot with the slot at <code>j</code>, similarly to
-   *  {@link #compare(int, int) compare(i, j)}. */
+  @Override
  protected abstract int comparePivot(int j);

  @Override
--- a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
@ -38,6 +38,7 @@ public abstract class MSBRadixSorter extends Sorter {
  // we store one histogram per recursion level
  private final int[][] histograms = new int[LEVEL_THRESHOLD][];
  private final int[] endOffsets = new int[HISTOGRAM_SIZE];
+  private final int[] commonPrefix;

  private final int maxLength;

@ -47,6 +48,7 @@ public abstract class MSBRadixSorter extends Sorter {
   */
  protected MSBRadixSorter(int maxLength) {
    this.maxLength = maxLength;
+    this.commonPrefix = new int[Math.min(24, maxLength)];
  }

  /** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
@ -116,14 +118,14 @@ public abstract class MSBRadixSorter extends Sorter {
  @Override
  public void sort(int from, int to) {
    checkRange(from, to);
-    sort(from, to, 0);
+    sort(from, to, 0, 0);
  }

-  private void sort(int from, int to, int k) {
-    if (to - from <= LENGTH_THRESHOLD || k >= LEVEL_THRESHOLD) {
+  private void sort(int from, int to, int k, int l) {
+    if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
      introSort(from, to, k);
    } else {
-      radixSort(from, to, k);
+      radixSort(from, to, k, l);
    }
  }

@ -131,28 +133,30 @@ public abstract class MSBRadixSorter extends Sorter {
    getFallbackSorter(k).sort(from, to);
  }

-  private void radixSort(int from, int to, int k) {
-    int[] histogram = histograms[k];
+  /**
+   * @param k the character number to compare
+   * @param l the level of recursion
+   */
+  private void radixSort(int from, int to, int k, int l) {
+    int[] histogram = histograms[l];
    if (histogram == null) {
-      histogram = histograms[k] = new int[HISTOGRAM_SIZE];
+      histogram = histograms[l] = new int[HISTOGRAM_SIZE];
    } else {
      Arrays.fill(histogram, 0);
    }

-    buildHistogram(from, to, k, histogram);
-
-    // short-circuit: if all keys have the same byte at offset k, then recurse directly
-    for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
-      if (histogram[i] == to - from) {
-        // everything is in the same bucket, recurse
-        if (i > 0) {
-          sort(from, to, k + 1);
-        }
-        return;
-      } else if (histogram[i] != 0) {
-        break;
+    final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, k, histogram);
+    if (commonPrefixLength > 0) {
+      // if there are no more chars to compare or if all entries fell into the
+      // first bucket (which means strings are shorter than k) then we are done
+      // otherwise recurse
+      if (k + commonPrefixLength < maxLength
+          && histogram[0] < to - from) {
+        radixSort(from, to, k + commonPrefixLength, l);
      }
+      return;
    }
+    assert assertHistogram(commonPrefixLength, histogram);

    int[] startOffsets = histogram;
    int[] endOffsets = this.endOffsets;
@ -167,24 +171,83 @@ public abstract class MSBRadixSorter extends Sorter {
        int h = endOffsets[i];
        final int bucketLen = h - prev;
        if (bucketLen > 1) {
-          sort(from + prev, from + h, k + 1);
+          sort(from + prev, from + h, k + 1, l + 1);
        }
        prev = h;
      }
    }
  }

+  // only used from assert
+  private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
+    int numberOfUniqueBytes = 0;
+    for (int freq : histogram) {
+      if (freq > 0) {
+        numberOfUniqueBytes++;
+      }
+    }
+    if (numberOfUniqueBytes == 1) {
+      assert commonPrefixLength >= 1;
+    } else {
+      assert commonPrefixLength == 0 : commonPrefixLength;
+    }
+    return true;
+  }
+
  /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
  private int getBucket(int i, int k) {
    return byteAt(i, k) + 1;
  }

-  /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
-  private int[] buildHistogram(int from, int to, int k, int[] histogram) {
+  /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
+   *  and return a common prefix length for all visited values.
+   *  @see #buildHistogram */
+  private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
+    final int[] commonPrefix = this.commonPrefix;
+    int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
+    for (int j = 0; j < commonPrefixLength; ++j) {
+      final int b = byteAt(from, k + j);
+      commonPrefix[j] = b;
+      if (b == -1) {
+        commonPrefixLength = j + 1;
+        break;
+      }
+    }
+
+    int i;
+    outer: for (i = from + 1; i < to; ++i) {
+      for (int j = 0; j < commonPrefixLength; ++j) {
+        final int b = byteAt(i, k + j);
+        if (b != commonPrefix[j]) {
+          commonPrefixLength = j;
+          if (commonPrefixLength == 0) { // we have no common prefix
+            histogram[commonPrefix[0] + 1] = i - from;
+            histogram[b + 1] = 1;
+            break outer;
+          }
+          break;
+        }
+      }
+    }
+
+    if (i < to) {
+      // the loop got broken because there is no common prefix
+      assert commonPrefixLength == 0;
+      buildHistogram(i + 1, to, k, histogram);
+    } else {
+      assert commonPrefixLength > 0;
+      histogram[commonPrefix[0] + 1] = to - from;
+    }
+
+    return commonPrefixLength;
+  }
+
+  /** Build an histogram of the k-th characters of values occurring between
+   *  offsets {@code from} and {@code to}, using {@link #getBucket}. */
+  private void buildHistogram(int from, int to, int k, int[] histogram) {
    for (int i = from; i < to; ++i) {
      histogram[getBucket(i, k)]++;
    }
-    return histogram;
  }

  /** Accumulate values of the histogram so that it does not store counts but
--- a/lucene/core/src/java/org/apache/lucene/util/RadixSelector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/RadixSelector.java
@ -36,6 +36,7 @@ public abstract class RadixSelector extends Selector {

  // we store one histogram per recursion level
  private final int[] histogram = new int[HISTOGRAM_SIZE];
+  private final int[] commonPrefix;

  private final int maxLength;

@ -45,6 +46,7 @@ public abstract class RadixSelector extends Selector {
   */
  protected RadixSelector(int maxLength) {
    this.maxLength = maxLength;
+    this.commonPrefix = new int[Math.min(24, maxLength)];
  }

  /** Return the k-th byte of the entry at index {@code i}, or {@code -1} if
@ -112,22 +114,37 @@ public abstract class RadixSelector extends Selector {
  @Override
  public void select(int from, int to, int k) {
    checkArgs(from, to, k);
-    select(from, to, k, 0);
+    select(from, to, k, 0, 0);
  }

-  private void select(int from, int to, int k, int d) {
+  private void select(int from, int to, int k, int d, int l) {
    if (to - from <= LENGTH_THRESHOLD || d >= LEVEL_THRESHOLD) {
      getFallbackSelector(d).select(from, to, k);
    } else {
-      radixSelect(from, to, k, d);
+      radixSelect(from, to, k, d, l);
    }
  }

-  private void radixSelect(int from, int to, int k, int d) {
+  /**
+   * @param d the character number to compare
+   * @param l the level of recursion
+   */
+  private void radixSelect(int from, int to, int k, int d, int l) {
    final int[] histogram = this.histogram;
    Arrays.fill(histogram, 0);

-    buildHistogram(from, to, d, histogram);
+    final int commonPrefixLength = computeCommonPrefixLengthAndBuildHistogram(from, to, d, histogram);
+    if (commonPrefixLength > 0) {
+      // if there are no more chars to compare or if all entries fell into the
+      // first bucket (which means strings are shorter than d) then we are done
+      // otherwise recurse
+      if (d + commonPrefixLength < maxLength
+          && histogram[0] < to - from) {
+        radixSelect(from, to, k, d + commonPrefixLength, l);
+      }
+      return;
+    }
+    assert assertHistogram(commonPrefixLength, histogram);

    int bucketFrom = from;
    for (int bucket = 0; bucket < HISTOGRAM_SIZE; ++bucket) {
@ -138,7 +155,7 @@ public abstract class RadixSelector extends Selector {

        if (bucket != 0 && d + 1 < maxLength) {
          // all elements in bucket 0 are equal so we only need to recurse if bucket != 0
-          select(bucketFrom, bucketTo, k, d + 1);
+          select(bucketFrom, bucketTo, k, d + 1, l + 1);
        }
        return;
      }
@ -147,17 +164,76 @@ public abstract class RadixSelector extends Selector {
    throw new AssertionError("Unreachable code");
  }

+  // only used from assert
+  private boolean assertHistogram(int commonPrefixLength, int[] histogram) {
+    int numberOfUniqueBytes = 0;
+    for (int freq : histogram) {
+      if (freq > 0) {
+        numberOfUniqueBytes++;
+      }
+    }
+    if (numberOfUniqueBytes == 1) {
+      assert commonPrefixLength >= 1;
+    } else {
+      assert commonPrefixLength == 0;
+    }
+    return true;
+  }
+
  /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
  private int getBucket(int i, int k) {
    return byteAt(i, k) + 1;
  }

-  /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}. */
-  private int[] buildHistogram(int from, int to, int k, int[] histogram) {
+  /** Build a histogram of the number of values per {@link #getBucket(int, int) bucket}
+   *  and return a common prefix length for all visited values.
+   *  @see #buildHistogram */
+  private int computeCommonPrefixLengthAndBuildHistogram(int from, int to, int k, int[] histogram) {
+    final int[] commonPrefix = this.commonPrefix;
+    int commonPrefixLength = Math.min(commonPrefix.length, maxLength - k);
+    for (int j = 0; j < commonPrefixLength; ++j) {
+      final int b = byteAt(from, k + j);
+      commonPrefix[j] = b;
+      if (b == -1) {
+        commonPrefixLength = j + 1;
+        break;
+      }
+    }
+
+    int i;
+    outer: for (i = from + 1; i < to; ++i) {
+      for (int j = 0; j < commonPrefixLength; ++j) {
+        final int b = byteAt(i, k + j);
+        if (b != commonPrefix[j]) {
+          commonPrefixLength = j;
+          if (commonPrefixLength == 0) { // we have no common prefix
+            histogram[commonPrefix[0] + 1] = i - from;
+            histogram[b + 1] = 1;
+            break outer;
+          }
+          break;
+        }
+      }
+    }
+
+    if (i < to) {
+      // the loop got broken because there is no common prefix
+      assert commonPrefixLength == 0;
+      buildHistogram(i + 1, to, k, histogram);
+    } else {
+      assert commonPrefixLength > 0;
+      histogram[commonPrefix[0] + 1] = to - from;
+    }
+
+    return commonPrefixLength;
+  }
+
+  /** Build an histogram of the k-th characters of values occurring between
+   *  offsets {@code from} and {@code to}, using {@link #getBucket}. */
+  private void buildHistogram(int from, int to, int k, int[] histogram) {
    for (int i = from; i < to; ++i) {
      histogram[getBucket(i, k)]++;
    }
-    return histogram;
  }

  /** Reorder elements so that all of them that fall into {@code bucket} are
--- a/lucene/core/src/java/org/apache/lucene/util/Sorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/Sorter.java
@ -23,7 +23,7 @@ import java.util.Comparator;
 * @lucene.internal */
 public abstract class Sorter {

-  static final int INSERTION_SORT_THRESHOLD = 20;
+  static final int BINARY_SORT_THRESHOLD = 20;

  /** Sole constructor, used for inheritance. */
  protected Sorter() {}
@ -36,6 +36,20 @@ public abstract class Sorter {
  /** Swap values at slots <code>i</code> and <code>j</code>. */
  protected abstract void swap(int i, int j);

+  private int pivotIndex;
+
+  /** Save the value at slot <code>i</code> so that it can later be used as a
+   * pivot, see {@link #comparePivot(int)}. */
+  protected void setPivot(int i) {
+    pivotIndex = i;
+  }
+
+  /** Compare the pivot with the slot at <code>j</code>, similarly to
+   *  {@link #compare(int, int) compare(i, j)}. */
+  protected int comparePivot(int j) {
+    return compare(pivotIndex, j);
+  }
+
  /** Sort the slice which starts at <code>from</code> (inclusive) and ends at
   *  <code>to</code> (exclusive). */
  public abstract void sort(int from, int to);
@ -163,54 +177,41 @@ public abstract class Sorter {
    }
  }

-  void insertionSort(int from, int to) {
-    for (int i = from + 1; i < to; ++i) {
-      for (int j = i; j > from; --j) {
-        if (compare(j - 1, j) > 0) {
-          swap(j - 1, j);
-        } else {
-          break;
-        }
-      }
-    }
-  }
-
+  /**
+   * A binary sort implementation. This performs {@code O(n*log(n))} comparisons
+   * and {@code O(n^2)} swaps. It is typically used by more sophisticated
+   * implementations as a fall-back when the numbers of items to sort has become
+   * less than {@value #BINARY_SORT_THRESHOLD}.
+   */
  void binarySort(int from, int to) {
    binarySort(from, to, from + 1);
  }

  void binarySort(int from, int to, int i) {
    for ( ; i < to; ++i) {
+      setPivot(i);
      int l = from;
      int h = i - 1;
      while (l <= h) {
        final int mid = (l + h) >>> 1;
-        final int cmp = compare(i, mid);
+        final int cmp = comparePivot(mid);
        if (cmp < 0) {
          h = mid - 1;
        } else {
          l = mid + 1;
        }
      }
-      switch (i - l) {
-      case 2:
-        swap(l + 1, l + 2);
-        swap(l, l + 1);
-        break;
-      case 1:
-        swap(l, l + 1);
-        break;
-      case 0:
-        break;
-      default:
-        for (int j = i; j > l; --j) {
-          swap(j - 1, j);
-        }
-        break;
+      for (int j = i; j > l; --j) {
+        swap(j - 1, j);
      }
    }
  }

+  /**
+   * Use heap sort to sort items between {@code from} inclusive and {@code to}
+   * exclusive. This runs in {@code O(n*log(n))} and is used as a fall-back by
+   * {@link IntroSorter}.
+   */
  void heapSort(int from, int to) {
    if (to - from <= 1) {
      return;
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@ -112,7 +112,8 @@ public class BKDWriter implements Closeable {
  final byte[] scratchDiff;
  final byte[] scratch1;
  final byte[] scratch2;
-  final BytesRef scratchBytesRef = new BytesRef();
+  final BytesRef scratchBytesRef1 = new BytesRef();
+  final BytesRef scratchBytesRef2 = new BytesRef();
  final int[] commonPrefixLengths;

  protected final FixedBitSet docsSeen;
@ -174,7 +175,6 @@ public class BKDWriter implements Closeable {
    packedBytesLength = numDims * bytesPerDim;

    scratchDiff = new byte[bytesPerDim];
-    scratchBytesRef.length = packedBytesLength;
    scratch1 = new byte[packedBytesLength];
    scratch2 = new byte[packedBytesLength];
    commonPrefixLengths = new int[numDims];
@ -465,14 +465,14 @@ public class BKDWriter implements Closeable {
    Arrays.fill(minPackedValue, (byte) 0xff);
    Arrays.fill(maxPackedValue, (byte) 0);
    for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
-      reader.getValue(i, scratch1);
+      reader.getValue(i, scratchBytesRef1);
      for(int dim=0;dim<numDims;dim++) {
        int offset = dim*bytesPerDim;
-        if (StringHelper.compare(bytesPerDim, scratch1, offset, minPackedValue, offset) < 0) {
-          System.arraycopy(scratch1, offset, minPackedValue, offset, bytesPerDim);
+        if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
+          System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
        }
-        if (StringHelper.compare(bytesPerDim, scratch1, offset, maxPackedValue, offset) > 0) {
-          System.arraycopy(scratch1, offset, maxPackedValue, offset, bytesPerDim);
+        if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
+          System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
        }
      }

@ -811,6 +811,7 @@ public class BKDWriter implements Closeable {
  }

  private PointWriter sort(int dim) throws IOException {
+    assert dim >= 0 && dim < numDims;

    if (heapPointWriter != null) {

@ -1251,13 +1252,13 @@ public class BKDWriter implements Closeable {

      // Compute common prefixes
      Arrays.fill(commonPrefixLengths, bytesPerDim);
-      reader.getValue(from, scratch1);
+      reader.getValue(from, scratchBytesRef1);
      for (int i = from + 1; i < to; ++i) {
-        reader.getValue(i, scratch2);
+        reader.getValue(i, scratchBytesRef2);
        for (int dim=0;dim<numDims;dim++) {
          final int offset = dim * bytesPerDim;
          for(int j=0;j<commonPrefixLengths[dim];j++) {
-            if (scratch1[offset+j] != scratch2[offset+j]) {
+            if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
              commonPrefixLengths[dim] = j;
              break;
            }
@ -1294,7 +1295,7 @@ public class BKDWriter implements Closeable {

      // sort by sortedDim
      MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
-          reader, from, to, scratch1, scratch2);
+          reader, from, to, scratchBytesRef1, scratchBytesRef2);

      // Save the block file pointer:
      leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
@ -1307,22 +1308,16 @@ public class BKDWriter implements Closeable {
      writeLeafBlockDocs(out, docIDs, 0, count);

      // Write the common prefixes:
-      reader.getValue(from, scratch1);
+      reader.getValue(from, scratchBytesRef1);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
      writeCommonPrefixes(out, commonPrefixLengths, scratch1);

      // Write the full values:
      IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
-        final BytesRef scratch = new BytesRef(packedBytesLength);
-
-        {
-          scratch.offset = 0;
-          scratch.length = packedBytesLength;
-        }
-
        @Override
        public BytesRef apply(int i) {
-          reader.getValue(from + i, scratch.bytes);
-          return scratch;
+          reader.getValue(from + i, scratchBytesRef1);
+          return scratchBytesRef1;
        }
      };
      assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
@ -1344,18 +1339,20 @@ public class BKDWriter implements Closeable {
        }
      }
      MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
-          reader, from, to, mid, scratch1, scratch2);
+          reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);

      // set the split value
      final int address = nodeID * (1+bytesPerDim);
      splitPackedValues[address] = (byte) splitDim;
-      reader.getValue(mid, scratch1);
-      System.arraycopy(scratch1, splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
+      reader.getValue(mid, scratchBytesRef1);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);

      byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
      byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
-      System.arraycopy(scratch1, splitDim * bytesPerDim, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
-      System.arraycopy(scratch1, splitDim * bytesPerDim, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
+          minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
+          maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);

      // recurse
      build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
@ -17,6 +17,7 @@
 package org.apache.lucene.util.bkd;

 import org.apache.lucene.codecs.MutablePointsReader;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntroSelector;
 import org.apache.lucene.util.IntroSorter;
 import org.apache.lucene.util.MSBRadixSorter;
@ -54,8 +55,8 @@ final class MutablePointsReaderUtils {
      protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
        return new IntroSorter() {

-          final byte[] pivot = new byte[packedBytesLength];
-          final byte[] scratch = new byte[packedBytesLength];
+          final BytesRef pivot = new BytesRef();
+          final BytesRef scratch = new BytesRef();
          int pivotDoc;

          @Override
@ -73,7 +74,7 @@ final class MutablePointsReaderUtils {
          protected int comparePivot(int j) {
            if (k < packedBytesLength) {
              reader.getValue(j, scratch);
-              int cmp = StringHelper.compare(packedBytesLength - k, pivot, k, scratch, k);
+              int cmp = StringHelper.compare(packedBytesLength - k, pivot.bytes, pivot.offset + k, scratch.bytes, scratch.offset + k);
              if (cmp != 0) {
                return cmp;
              }
@ -89,7 +90,7 @@ final class MutablePointsReaderUtils {
  /** Sort points on the given dimension. */
  static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
      MutablePointsReader reader, int from, int to,
-      byte[] scratch1, byte[] scratch2) {
+      BytesRef scratch1, BytesRef scratch2) {

    // No need for a fancy radix sort here, this is called on the leaves only so
    // there are not many values to sort
@ -97,7 +98,7 @@ final class MutablePointsReaderUtils {
    final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
    new IntroSorter() {

-      final byte[] pivot = scratch1;
+      final BytesRef pivot = scratch1;
      int pivotDoc = -1;

      @Override
@ -114,7 +115,7 @@ final class MutablePointsReaderUtils {
      @Override
      protected int comparePivot(int j) {
        reader.getValue(j, scratch2);
-        int cmp = StringHelper.compare(numBytesToCompare, pivot, offset, scratch2, offset);
+        int cmp = StringHelper.compare(numBytesToCompare, pivot.bytes, pivot.offset + offset, scratch2.bytes, scratch2.offset + offset);
        if (cmp == 0) {
          cmp = pivotDoc - reader.getDocID(j);
        }
@ -128,7 +129,7 @@ final class MutablePointsReaderUtils {
   *  equal to it. */
  static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
      MutablePointsReader reader, int from, int to, int mid,
-      byte[] scratch1, byte[] scratch2) {
+      BytesRef scratch1, BytesRef scratch2) {
    final int offset = splitDim * bytesPerDim + commonPrefixLen;
    final int cmpBytes = bytesPerDim - commonPrefixLen;
    final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
@ -138,7 +139,7 @@ final class MutablePointsReaderUtils {
      protected Selector getFallbackSelector(int k) {
        return new IntroSelector() {

-          final byte[] pivot = scratch1;
+          final BytesRef pivot = scratch1;
          int pivotDoc;

          @Override
@ -156,7 +157,7 @@ final class MutablePointsReaderUtils {
          protected int comparePivot(int j) {
            if (k < cmpBytes) {
              reader.getValue(j, scratch2);
-              int cmp = StringHelper.compare(cmpBytes - k, pivot, offset + k, scratch2, offset + k);
+              int cmp = StringHelper.compare(cmpBytes - k, pivot.bytes, pivot.offset + offset + k, scratch2.bytes, scratch2.offset + offset + k);
              if (cmp != 0) {
                return cmp;
              }
--- a/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestByteBlockPool.java
@ -45,12 +45,25 @@ public class TestByteBlockPool extends LuceneTestCase {
      for (BytesRef expected : list) {
        ref.grow(expected.length);
        ref.setLength(expected.length);
-        if (random().nextBoolean()) {
-          pool.readBytes(position, ref.bytes(), 0, ref.length());
-        } else {
-          for (int i = 0; i < ref.length(); ++i) {
-            ref.setByteAt(i, pool.readByte(position + i));
-          }
+        switch (random().nextInt(3)) {
+          case 0:
+            // copy bytes
+            pool.readBytes(position, ref.bytes(), 0, ref.length());
+            break;
+          case 1:
+            // copy bytes one by one
+            for (int i = 0; i < ref.length(); ++i) {
+              ref.setByteAt(i, pool.readByte(position + i));
+            }
+            break;
+          case 2:
+            BytesRef scratch = new BytesRef();
+            scratch.length = ref.length();
+            pool.setRawBytesRef(scratch, position);
+            System.arraycopy(scratch.bytes, scratch.offset, ref.bytes(), 0, ref.length());
+            break;
+          default:
+            fail();
        }
        assertEquals(expected, ref.get());
        position += ref.length();
--- a/lucene/core/src/test/org/apache/lucene/util/TestMSBRadixSorter.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestMSBRadixSorter.java
@ -41,9 +41,12 @@ public class TestMSBRadixSorter extends LuceneTestCase {
        break;
    }

+    final int finalMaxLength = maxLength;
    new MSBRadixSorter(maxLength) {

+      @Override
      protected int byteAt(int i, int k) {
+        assertTrue(k < finalMaxLength);
        BytesRef ref = refs[i];
        if (ref.length <= k) {
          return -1;
--- a/lucene/core/src/test/org/apache/lucene/util/TestRadixSelector.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestRadixSelector.java
@ -36,13 +36,41 @@ public class TestRadixSelector extends LuceneTestCase {
      random().nextBytes(bytes);
      arr[i] = new BytesRef(bytes);
    }
+    doTest(arr, from, to, maxLen);
+  }
+
+  public void testSharedPrefixes() {
+    for (int iter = 0; iter < 100; ++iter) {
+      doTestSharedPrefixes();
+    }
+  }
+
+  private void doTestSharedPrefixes() {
+    final int from = random().nextInt(5);
+    final int to = from + TestUtil.nextInt(random(), 1, 10000);
+    final int maxLen = TestUtil.nextInt(random(), 1, 12);
+    BytesRef[] arr = new BytesRef[from + to + random().nextInt(5)];
+    for (int i = 0; i < arr.length; ++i) {
+      byte[] bytes = new byte[TestUtil.nextInt(random(), 0, maxLen)];
+      random().nextBytes(bytes);
+      arr[i] = new BytesRef(bytes);
+    }
+    final int sharedPrefixLength = Math.min(arr[0].length, TestUtil.nextInt(random(), 1, maxLen));
+    for (int i = 1; i < arr.length; ++i) {
+      System.arraycopy(arr[0].bytes, arr[0].offset, arr[i].bytes, arr[i].offset, Math.min(sharedPrefixLength, arr[i].length));
+    }
+    doTest(arr, from, to, maxLen);
+  }
+
+  private void doTest(BytesRef[] arr, int from, int to, int maxLen) {
    final int k = TestUtil.nextInt(random(), from, to - 1);

    BytesRef[] expected = arr.clone();
    Arrays.sort(expected, from, to);

    BytesRef[] actual = arr.clone();
-    RadixSelector selector = new RadixSelector(random().nextBoolean() ? maxLen : Integer.MAX_VALUE) {
+    final int enforcedMaxLen = random().nextBoolean() ? maxLen : Integer.MAX_VALUE;
+    RadixSelector selector = new RadixSelector(enforcedMaxLen) {

      @Override
      protected void swap(int i, int j) {
@ -51,6 +79,7 @@ public class TestRadixSelector extends LuceneTestCase {

      @Override
      protected int byteAt(int i, int k) {
+        assertTrue(k < enforcedMaxLen);
        BytesRef b = actual[i];
        if (k >= b.length) {
          return -1;
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
@ -44,7 +44,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
    Arrays.sort(points, new Comparator<Point>() {
      @Override
      public int compare(Point o1, Point o2) {
-        int cmp = StringHelper.compare(bytesPerDim, o1.packedValue, 0, o2.packedValue, 0);
+        int cmp = o1.packedValue.compareTo(o2.packedValue);
        if (cmp == 0) {
          cmp = Integer.compare(o1.doc, o2.doc);
        }
@ -70,19 +70,25 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
    for (int i = 0; i < commonPrefixLengths.length; ++i) {
      commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
    }
+    BytesRef firstValue = points[0].packedValue;
    for (int i = 1; i < points.length; ++i) {
      for (int dim = 0; dim < numDims; ++dim) {
        int offset = dim * bytesPerDim;
-        System.arraycopy(points[0].packedValue, offset, points[i].packedValue, offset, commonPrefixLengths[dim]);
+        BytesRef packedValue = points[i].packedValue;
+        System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
      }
    }
    DummyPointsReader reader = new DummyPointsReader(points);
    final int sortedDim = random().nextInt(numDims);
    MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
-        new byte[numDims * bytesPerDim], new byte[numDims * bytesPerDim]);
+        new BytesRef(), new BytesRef());
    for (int i = 1; i < points.length; ++i) {
      final int offset = sortedDim * bytesPerDim;
-      int cmp = StringHelper.compare(bytesPerDim, reader.points[i-1].packedValue, offset, reader.points[i].packedValue, offset);
+      BytesRef previousValue = reader.points[i-1].packedValue;
+      BytesRef currentValue = reader.points[i].packedValue;
+      int cmp = StringHelper.compare(bytesPerDim,
+          previousValue.bytes, previousValue.offset + offset,
+          currentValue.bytes, currentValue.offset + offset);
      if (cmp == 0) {
        cmp = reader.points[i - 1].doc - reader.points[i].doc;
      }
@ -103,17 +109,23 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
    Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
    int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
    final int splitDim =  random().nextInt(numDims);
+    BytesRef firstValue = points[0].packedValue;
    for (int i = 1; i < points.length; ++i) {
+      BytesRef packedValue = points[i].packedValue;
      int offset = splitDim * bytesPerDim;
-      System.arraycopy(points[0].packedValue, offset, points[i].packedValue, offset, commonPrefixLength);
+      System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
    }
    DummyPointsReader reader = new DummyPointsReader(points);
    final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
    MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
-        new byte[numDims * bytesPerDim], new byte[numDims * bytesPerDim]);
+        new BytesRef(), new BytesRef());
+    BytesRef pivotValue = reader.points[pivot].packedValue;
    int offset = splitDim * bytesPerDim;
    for (int i = 0; i < points.length; ++i) {
-      int cmp = StringHelper.compare(bytesPerDim, reader.points[i].packedValue, offset, reader.points[pivot].packedValue, offset);
+      BytesRef value = reader.points[i].packedValue;
+      int cmp = StringHelper.compare(bytesPerDim,
+          value.bytes, value.offset + offset,
+          pivotValue.bytes, pivotValue.offset + offset);
      if (cmp == 0) {
        cmp = reader.points[i].doc - reader.points[pivot].doc;
      }
@ -140,11 +152,15 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
  }

  private static class Point {
-    final byte[] packedValue;
+    final BytesRef packedValue;
    final int doc;

    Point(byte[] packedValue, int doc) {
-      this.packedValue = packedValue;
+      // use a non-null offset to make sure MutablePointsReaderUtils does not ignore it
+      this.packedValue = new BytesRef(packedValue.length + 1);
+      this.packedValue.bytes[0] = (byte) random().nextInt(256);
+      this.packedValue.offset = 1;
+      this.packedValue.length = packedValue.length;
      this.doc = doc;
    }

@ -154,17 +170,17 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
        return false;
      }
      Point that = (Point) obj;
-      return Arrays.equals(packedValue, that.packedValue) && doc == that.doc;
+      return packedValue.equals(that.packedValue) && doc == that.doc;
    }

    @Override
    public int hashCode() {
-      return 31 * Arrays.hashCode(packedValue) + doc;
+      return 31 * packedValue.hashCode() + doc;
    }

    @Override
    public String toString() {
-      return "value=" + new BytesRef(packedValue) + " doc=" + doc;
+      return "value=" + packedValue + " doc=" + doc;
    }
  }

@ -187,13 +203,16 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
    }

    @Override
-    public void getValue(int i, byte[] packedValue) {
-      System.arraycopy(points[i].packedValue, 0, packedValue, 0, points[i].packedValue.length);
+    public void getValue(int i, BytesRef packedValue) {
+      packedValue.bytes = points[i].packedValue.bytes;
+      packedValue.offset = points[i].packedValue.offset;
+      packedValue.length = points[i].packedValue.length;
    }

    @Override
    public byte getByteAt(int i, int k) {
-      return points[i].packedValue[k];
+      BytesRef packedValue = points[i].packedValue;
+      return packedValue.bytes[packedValue.offset + k];
    }

    @Override