From 5bf6cf2eddf60a0d2696f31b9a252eb7af6f9c32 Mon Sep 17 00:00:00 2001
From: Ignacio Vera <ivera@apache.org>
Date: Thu, 4 Jul 2019 10:50:23 +0200
Subject: [PATCH] LUCENE-8888: Improve distribution of points with data
 dimensions in BKD tree leaves (#747)

---
 lucene/CHANGES.txt                            |   3 +
 .../simpletext/SimpleTextBKDWriter.java       |  31 ++--
 .../lucene/util/bkd/BKDRadixSelector.java     | 140 ++++++++++--------
 .../org/apache/lucene/util/bkd/BKDWriter.java |  33 +++--
 .../lucene/util/bkd/HeapPointReader.java      |  39 +++--
 .../lucene/util/bkd/HeapPointWriter.java      |  50 +++----
 .../util/bkd/MutablePointsReaderUtils.java    |  53 +++++--
 .../lucene/util/bkd/OfflinePointReader.java   |  22 +--
 .../lucene/util/bkd/OfflinePointWriter.java   |   9 +-
 .../apache/lucene/util/bkd/PointValue.java    |   7 +-
 .../org/apache/lucene/util/bkd/TestBKD.java   |  74 +++++++++
 .../lucene/util/bkd/TestBKDRadixSelector.java |  68 ++++++++-
 .../lucene/util/bkd/TestBKDRadixSort.java     |   7 +-
 .../bkd/TestMutablePointsReaderUtils.java     | 112 +++++++++-----
 14 files changed, 443 insertions(+), 205 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1ed979cc336..a56bc832787 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -175,6 +175,9 @@ Optimizations
 * LUCENE-8901: Load frequencies lazily only when needed in BlockDocsEnum and
   BlockImpactsEverythingEnum (Mayya Sharipova).
 
+* LUCENE-8888: Optimize distribution of points with data dimensions in
+  BKD tree leaves. (Ignacio Vera)
+
 Test Framework
 
 * LUCENE-8825: CheckHits now display the shard index in case of mismatch
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
index aefa00c444c..ec41d874c94 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
@@ -596,7 +596,7 @@ final class SimpleTextBKDWriter implements Closeable {
     assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
 
     //We re-use the selector so we do not need to create an object every time.
-    BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
+    BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
 
     boolean success = false;
     try {
@@ -604,7 +604,7 @@ final class SimpleTextBKDWriter implements Closeable {
 
       build(1, numLeaves, points, out,
           radixSelector, minPackedValue, maxPackedValue,
-            splitPackedValues, leafBlockFPs);
+            splitPackedValues, leafBlockFPs, new int[maxPointsInLeafNode]);
 
 
       // If no exception, we should have cleaned everything up:
@@ -876,7 +876,7 @@ final class SimpleTextBKDWriter implements Closeable {
       }
 
       // sort by sortedDim
-      MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
+      MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
                                          reader, from, to, scratchBytesRef1, scratchBytesRef2);
 
       // Save the block file pointer:
@@ -919,7 +919,7 @@ final class SimpleTextBKDWriter implements Closeable {
           break;
         }
       }
-      MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
+      MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
           reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
 
       // set the split value
@@ -950,7 +950,8 @@ final class SimpleTextBKDWriter implements Closeable {
                      BKDRadixSelector radixSelector,
                      byte[] minPackedValue, byte[] maxPackedValue,
                      byte[] splitPackedValues,
-                     long[] leafBlockFPs) throws IOException {
+                     long[] leafBlockFPs,
+                     int[] spareDocIds) throws IOException {
 
     if (nodeID >= leafNodeOffset) {
 
@@ -1009,7 +1010,12 @@ final class SimpleTextBKDWriter implements Closeable {
       // loading the values:
       int count = to - from;
       assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
-      writeLeafBlockDocs(out, heapSource.docIDs, from, count);
+      // Write doc IDs
+      int[] docIDs = spareDocIds;
+      for (int i = 0; i < count; i++) {
+        docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
+      }
+      writeLeafBlockDocs(out, spareDocIds, 0, count);
 
       // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
       // from the index, much like how terms dict does so from the FST:
@@ -1029,7 +1035,7 @@ final class SimpleTextBKDWriter implements Closeable {
         }
       };
       assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
-          heapSource.docIDs, from);
+          docIDs, 0);
       writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
 
     } else {
@@ -1074,12 +1080,12 @@ final class SimpleTextBKDWriter implements Closeable {
 
       // Recurse on left tree:
       build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector,
-            minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs);
+            minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
 
       // TODO: we could "tail recurse" here?  have our parent discard its refs as we recurse right?
       // Recurse on right tree:
       build(2*nodeID+1, leafNodeOffset, pathSlices[1], out, radixSelector,
-            minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs);
+            minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
     }
   }
 
@@ -1131,6 +1137,13 @@ final class SimpleTextBKDWriter implements Closeable {
       if (cmp > 0) {
         throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
       }
+      if (cmp == 0  && numDataDims > numIndexDims) {
+        int dataOffset = numIndexDims * bytesPerDim;
+        cmp = Arrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
+        if (cmp > 0) {
+          throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
+        }
+      }
       if (cmp == 0 && doc < lastDoc) {
         throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim);
       }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java
index 84fce3fa8e7..5a271103dbb 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java
@@ -47,6 +47,8 @@ public final class BKDRadixSelector {
   private final int bytesSorted;
   //data dimensions size
   private final int packedBytesLength;
+  // data dimensions plus docID size
+  private final int packedBytesDocIDLength;
   //flag to when we are moving to sort on heap
   private final int maxPointsSortInHeap;
   //reusable buffer
@@ -59,18 +61,26 @@ public final class BKDRadixSelector {
   private final Directory tempDir;
   // prefix for temp files
   private final String tempFileNamePrefix;
+  // data and index dimensions
+  private final int numDataDims, numIndexDims;
 
 
   /**
    * Sole constructor.
    */
-  public BKDRadixSelector(int numDim, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
+  public BKDRadixSelector(int numDataDims, int numIndexDims, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
     this.bytesPerDim = bytesPerDim;
-    this.packedBytesLength = numDim * bytesPerDim;
-    this.bytesSorted = bytesPerDim + Integer.BYTES;
+    this.numDataDims = numDataDims;
+    this.numIndexDims = numIndexDims;
+    this.packedBytesLength = numDataDims * bytesPerDim;
+    this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
+    // Selection and sorting is done in a given dimension. In case the value of the dimension are equal
+    // between two points we tie break first using the data-only dimensions and if those are still equal
+    // we tie-break on the docID. Here we account for all bytes used in the process.
+    this.bytesSorted = bytesPerDim  + (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
     this.maxPointsSortInHeap = maxPointsSortInHeap;
-    int numberOfPointsOffline  = MAX_SIZE_OFFLINE_BUFFER / (packedBytesLength + Integer.BYTES);
-    this.offlineBuffer = new byte[numberOfPointsOffline * (packedBytesLength + Integer.BYTES)];
+    int numberOfPointsOffline  = MAX_SIZE_OFFLINE_BUFFER / packedBytesDocIDLength;
+    this.offlineBuffer = new byte[numberOfPointsOffline * packedBytesDocIDLength];
     this.partitionBucket = new int[bytesSorted];
     this.histogram = new long[HISTOGRAM_SIZE];
     this.scratch = new byte[bytesSorted];
@@ -133,12 +143,12 @@ public final class BKDRadixSelector {
       assert commonPrefixPosition > dimCommonPrefix;
       reader.next();
       PointValue pointValue = reader.pointValue();
+      BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
       // copy dimension
-      BytesRef packedValue = pointValue.packedValue();
-      System.arraycopy(packedValue.bytes, packedValue.offset + offset, scratch, 0, bytesPerDim);
-      // copy docID
-      BytesRef docIDBytes = pointValue.docIDBytes();
-      System.arraycopy(docIDBytes.bytes, docIDBytes.offset, scratch, bytesPerDim, Integer.BYTES);
+      System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, bytesPerDim);
+      // copy data dimensions and docID
+      System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + numIndexDims * bytesPerDim, scratch, bytesPerDim, (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES);
+
       for (long i = from + 1; i < to; i++) {
         reader.next();
         pointValue = reader.pointValue();
@@ -156,13 +166,15 @@ public final class BKDRadixSelector {
           //check common prefix and adjust histogram
           final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix;
           final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition;
-          packedValue = pointValue.packedValue();
-          int j = Arrays.mismatch(scratch, startIndex, endIndex, packedValue.bytes, packedValue.offset + offset + startIndex, packedValue.offset + offset + endIndex);
+          packedValueDocID = pointValue.packedValueDocIDBytes();
+          int j = Arrays.mismatch(scratch, startIndex, endIndex, packedValueDocID.bytes, packedValueDocID.offset + offset + startIndex, packedValueDocID.offset + offset + endIndex);
           if (j == -1) {
             if (commonPrefixPosition > bytesPerDim) {
-              //tie-break on docID
-              docIDBytes = pointValue.docIDBytes();
-              int k = Arrays.mismatch(scratch, bytesPerDim, commonPrefixPosition, docIDBytes.bytes, docIDBytes.offset, docIDBytes.offset + commonPrefixPosition - bytesPerDim);
+              //tie-break on data dimensions + docID
+              final int startTieBreak = numIndexDims * bytesPerDim;
+              final int endTieBreak = startTieBreak + commonPrefixPosition - bytesPerDim;
+              int k = Arrays.mismatch(scratch, bytesPerDim, commonPrefixPosition,
+                  packedValueDocID.bytes, packedValueDocID.offset + startTieBreak, packedValueDocID.offset + endTieBreak);
               if (k != -1) {
                 commonPrefixPosition = bytesPerDim + k;
                 Arrays.fill(histogram, 0);
@@ -194,8 +206,8 @@ public final class BKDRadixSelector {
       BytesRef packedValue = pointValue.packedValue();
       bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
     } else {
-      BytesRef docIDValue = pointValue.docIDBytes();
-      bucket = docIDValue.bytes[docIDValue.offset + commonPrefixPosition - bytesPerDim] & 0xff;
+      BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
+      bucket = packedValueDocID.bytes[packedValueDocID.offset + numIndexDims * bytesPerDim + commonPrefixPosition - bytesPerDim] & 0xff;
     }
     return bucket;
   }
@@ -309,10 +321,11 @@ public final class BKDRadixSelector {
     return partition;
   }
 
-  private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefix) {
-    final int offset = dim * bytesPerDim + commonPrefix;
-    final int dimCmpBytes = bytesPerDim - commonPrefix;
-    new RadixSelector(bytesSorted - commonPrefix) {
+  private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefixLength) {
+    final int dimOffset = dim * bytesPerDim + commonPrefixLength;
+    final int dimCmpBytes = bytesPerDim - commonPrefixLength;
+    final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
+    new RadixSelector(bytesSorted - commonPrefixLength) {
 
       @Override
       protected void swap(int i, int j) {
@@ -324,23 +337,23 @@ public final class BKDRadixSelector {
         assert k >= 0 : "negative prefix " + k;
         if (k  < dimCmpBytes) {
           // dim bytes
-          return points.block[i * packedBytesLength + offset + k] & 0xff;
+          return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
         } else {
-          // doc id
-          int s = 3 - (k - dimCmpBytes);
-          return (points.docIDs[i] >>> (s * 8)) & 0xff;
+          // data bytes
+          return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
         }
       }
 
       @Override
       protected Selector getFallbackSelector(int d) {
-        int skypedBytes = d + commonPrefix;
-        final int start = dim * bytesPerDim + skypedBytes;
-        final int end =  dim * bytesPerDim + bytesPerDim;
+        final int skypedBytes = d + commonPrefixLength;
+        final int dimStart = dim * bytesPerDim + skypedBytes;
+        final int dimEnd =  dim * bytesPerDim + bytesPerDim;
+        final int dataOffset = numIndexDims * bytesPerDim;
+        // data length is composed by the data dimensions plus the docID
+        final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
         return new IntroSelector() {
 
-          int pivotDoc = -1;
-
           @Override
           protected void swap(int i, int j) {
             points.swap(i, j);
@@ -349,36 +362,37 @@ public final class BKDRadixSelector {
           @Override
           protected void setPivot(int i) {
             if (skypedBytes < bytesPerDim) {
-              System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
+              System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
             }
-            pivotDoc = points.docIDs[i];
+            System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
           }
 
           @Override
           protected int compare(int i, int j) {
             if (skypedBytes < bytesPerDim) {
-              int iOffset = i * packedBytesLength;
-              int jOffset = j * packedBytesLength;
-              int cmp = Arrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
-                  points.block, jOffset + start, jOffset + end);
+              int iOffset = i * packedBytesDocIDLength;
+              int jOffset = j * packedBytesDocIDLength;
+              int cmp = Arrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
               if (cmp != 0) {
                 return cmp;
               }
             }
-            return points.docIDs[i] - points.docIDs[j];
+            int iOffset = i * packedBytesDocIDLength + dataOffset;
+            int jOffset = j * packedBytesDocIDLength + dataOffset;
+            return Arrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
           }
 
           @Override
           protected int comparePivot(int j) {
             if (skypedBytes < bytesPerDim) {
-              int jOffset = j * packedBytesLength;
-              int cmp = Arrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
-                  points.block, jOffset + start, jOffset + end);
+              int jOffset = j * packedBytesDocIDLength;
+              int cmp = Arrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
               if (cmp != 0) {
                 return cmp;
               }
             }
-            return pivotDoc - points.docIDs[j];
+            int jOffset = j * packedBytesDocIDLength + dataOffset;
+            return Arrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
           }
         };
       }
@@ -393,8 +407,9 @@ public final class BKDRadixSelector {
 
   /** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
   public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
-    final int offset = dim * bytesPerDim + commonPrefixLength;
+    final int dimOffset = dim * bytesPerDim + commonPrefixLength;
     final int dimCmpBytes = bytesPerDim - commonPrefixLength;
+    final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
     new MSBRadixSorter(bytesSorted - commonPrefixLength) {
 
       @Override
@@ -402,11 +417,10 @@ public final class BKDRadixSelector {
         assert k >= 0 : "negative prefix " + k;
         if (k  < dimCmpBytes) {
           // dim bytes
-          return points.block[i * packedBytesLength + offset + k] & 0xff;
+          return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
         } else {
-          // doc id
-          int s = 3 - (k - dimCmpBytes);
-          return (points.docIDs[i] >>> (s * 8)) & 0xff;
+          // data bytes
+          return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
         }
       }
 
@@ -417,13 +431,14 @@ public final class BKDRadixSelector {
 
       @Override
       protected Sorter getFallbackSorter(int k) {
-        int skypedBytes = k + commonPrefixLength;
-        final int start = dim * bytesPerDim + skypedBytes;
-        final int end =  dim * bytesPerDim + bytesPerDim;
+        final int skypedBytes = k + commonPrefixLength;
+        final int dimStart = dim * bytesPerDim + skypedBytes;
+        final int dimEnd =  dim * bytesPerDim + bytesPerDim;
+        final int dataOffset = numIndexDims * bytesPerDim;
+        // data length is composed by the data dimensions plus the docID
+        final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
         return new IntroSorter() {
 
-          int pivotDoc = -1;
-
           @Override
           protected void swap(int i, int j) {
             points.swap(i, j);
@@ -432,36 +447,37 @@ public final class BKDRadixSelector {
           @Override
           protected void setPivot(int i) {
             if (skypedBytes < bytesPerDim) {
-              System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
+              System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
             }
-            pivotDoc = points.docIDs[i];
+            System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
           }
 
           @Override
           protected int compare(int i, int j) {
             if (skypedBytes < bytesPerDim) {
-              int iOffset = i * packedBytesLength;
-              int jOffset = j * packedBytesLength;
-              int cmp = Arrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
-                  points.block, jOffset + start, jOffset + end);
+              int iOffset = i * packedBytesDocIDLength;
+              int jOffset = j * packedBytesDocIDLength;
+              int cmp = Arrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
               if (cmp != 0) {
                 return cmp;
               }
             }
-            return points.docIDs[i] - points.docIDs[j];
+            int iOffset = i * packedBytesDocIDLength + dataOffset;
+            int jOffset = j * packedBytesDocIDLength + dataOffset;
+            return Arrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
           }
 
           @Override
           protected int comparePivot(int j) {
             if (skypedBytes < bytesPerDim) {
-              int jOffset = j * packedBytesLength;
-              int cmp = Arrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
-                  points.block, jOffset + start, jOffset + end);
+              int jOffset = j * packedBytesDocIDLength;
+              int cmp = Arrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
               if (cmp != 0) {
                 return cmp;
               }
             }
-            return pivotDoc - points.docIDs[j];
+            int jOffset = j * packedBytesDocIDLength + dataOffset;
+            return Arrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
           }
         };
       }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index ae7b362fcbd..650d4af82e0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -774,7 +774,7 @@ public class BKDWriter implements Closeable {
     assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
 
     //We re-use the selector so we do not need to create an object every time.
-    BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
+    BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
 
     boolean success = false;
     try {
@@ -785,7 +785,8 @@ public class BKDWriter implements Closeable {
             minPackedValue, maxPackedValue,
             parentSplits,
             splitPackedValues,
-            leafBlockFPs);
+            leafBlockFPs,
+            new int[maxPointsInLeafNode]);
       assert Arrays.equals(parentSplits, new int[numIndexDims]);
 
       // If no exception, we should have cleaned everything up:
@@ -1361,7 +1362,7 @@ public class BKDWriter implements Closeable {
       }
 
       // sort by sortedDim
-      MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
+      MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
           reader, from, to, scratchBytesRef1, scratchBytesRef2);
 
       BytesRef comparator = scratchBytesRef1;
@@ -1428,7 +1429,7 @@ public class BKDWriter implements Closeable {
         commonPrefixLen = bytesPerDim;
       }
 
-      MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
+      MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
           reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
 
       // set the split value
@@ -1465,7 +1466,8 @@ public class BKDWriter implements Closeable {
                      byte[] minPackedValue, byte[] maxPackedValue,
                      int[] parentSplits,
                      byte[] splitPackedValues,
-                     long[] leafBlockFPs) throws IOException {
+                     long[] leafBlockFPs,
+                     int[] spareDocIds) throws IOException {
 
     if (nodeID >= leafNodeOffset) {
 
@@ -1525,7 +1527,13 @@ public class BKDWriter implements Closeable {
       // loading the values:
       int count = to - from;
       assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
-      writeLeafBlockDocs(out, heapSource.docIDs, from, count);
+      assert count <= spareDocIds.length : "count=" + count + " > length=" + spareDocIds.length;
+      // Write doc IDs
+      int[] docIDs = spareDocIds;
+      for (int i = 0; i < count; i++) {
+        docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
+      }
+      writeLeafBlockDocs(out, docIDs, 0, count);
 
       // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
       // from the index, much like how terms dict does so from the FST:
@@ -1548,7 +1556,7 @@ public class BKDWriter implements Closeable {
         }
       };
       assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
-          heapSource.docIDs, from);
+          docIDs, 0);
       writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
 
     } else {
@@ -1595,12 +1603,12 @@ public class BKDWriter implements Closeable {
       // Recurse on left tree:
       build(2 * nodeID, leafNodeOffset, slices[0],
           out, radixSelector, minPackedValue, maxSplitPackedValue,
-          parentSplits, splitPackedValues, leafBlockFPs);
+          parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
 
       // Recurse on right tree:
       build(2 * nodeID + 1, leafNodeOffset, slices[1],
           out, radixSelector, minSplitPackedValue, maxPackedValue
-          , parentSplits, splitPackedValues, leafBlockFPs);
+          , parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
 
       parentSplits[splitDim]--;
     }
@@ -1654,6 +1662,13 @@ public class BKDWriter implements Closeable {
       if (cmp > 0) {
         throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
       }
+      if (cmp == 0  && numDataDims > numIndexDims) {
+        int dataOffset = numIndexDims * bytesPerDim;
+        cmp = Arrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
+        if (cmp > 0) {
+          throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
+        }
+      }
       if (cmp == 0 && doc < lastDoc) {
         throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
       }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index 735b3138b21..4cee1ef036d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -27,16 +27,16 @@ public final class HeapPointReader implements PointReader {
   private int curRead;
   final byte[] block;
   final int packedBytesLength;
-  final int[] docIDs;
+  final int packedBytesDocIDLength;
   final int end;
   private final HeapPointValue pointValue;
 
-  public HeapPointReader(byte[] block, int packedBytesLength, int[] docIDs, int start, int end) {
+  public HeapPointReader(byte[] block, int packedBytesLength, int start, int end) {
     this.block = block;
-    this.docIDs = docIDs;
     curRead = start-1;
     this.end = end;
     this.packedBytesLength = packedBytesLength;
+    this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
     if (start < end) {
       this.pointValue = new HeapPointValue(block, packedBytesLength);
     } else {
@@ -53,7 +53,7 @@ public final class HeapPointReader implements PointReader {
 
   @Override
   public PointValue pointValue() {
-    pointValue.setValue(curRead * packedBytesLength, docIDs[curRead]);
+    pointValue.setOffset(curRead * packedBytesDocIDLength);
     return pointValue;
   }
 
@@ -66,21 +66,22 @@ public final class HeapPointReader implements PointReader {
    */
   static class HeapPointValue implements PointValue {
 
-    BytesRef packedValue;
-    BytesRef docIDBytes;
-    int docID;
+    final BytesRef packedValue;
+    final BytesRef packedValueDocID;
+    final int packedValueLength;
 
-    public HeapPointValue(byte[] value, int packedLength) {
-      packedValue = new BytesRef(value, 0, packedLength);
-      docIDBytes = new BytesRef(new byte[4]);
+    HeapPointValue(byte[] value, int packedValueLength) {
+      this.packedValueLength = packedValueLength;
+      this.packedValue = new BytesRef(value, 0, packedValueLength);
+      this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
     }
 
     /**
-     * Sets a new value by changing the offset and docID.
+     * Sets a new value by changing the offset.
      */
-    public void setValue(int offset, int docID) {
-      this.docID = docID;
+    public void setOffset(int offset) {
       packedValue.offset = offset;
+      packedValueDocID.offset = offset;
     }
 
     @Override
@@ -90,16 +91,14 @@ public final class HeapPointReader implements PointReader {
 
     @Override
     public int docID() {
-      return docID;
+      int position = packedValueDocID.offset + packedValueLength;
+      return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
+          | ((packedValueDocID.bytes[++position] & 0xFF) <<  8) |  (packedValueDocID.bytes[++position] & 0xFF);
     }
 
     @Override
-    public BytesRef docIDBytes() {
-      docIDBytes.bytes[0] = (byte) (docID >> 24);
-      docIDBytes.bytes[1] = (byte) (docID >> 16);
-      docIDBytes.bytes[2] = (byte) (docID >> 8);
-      docIDBytes.bytes[3] = (byte) (docID >> 0);
-      return docIDBytes;
+    public BytesRef packedValueDocIDBytes() {
+      return packedValueDocID;
     }
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
index 306ec5c46ee..695d7e1f2bd 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -26,10 +26,10 @@ import org.apache.lucene.util.BytesRef;
  *  @lucene.internal
  *  */
 public final class HeapPointWriter implements PointWriter {
-  public final int[] docIDs;
   public final byte[] block;
   final int size;
   final int packedBytesLength;
+  final int packedBytesDocIDLength;
   private final byte[] scratch;
   private int nextWrite;
   private boolean closed;
@@ -38,11 +38,11 @@ public final class HeapPointWriter implements PointWriter {
 
 
   public HeapPointWriter(int size, int packedBytesLength) {
-    this.docIDs = new int[size];
-    this.block = new byte[packedBytesLength * size];
-    this.size = size;
+    this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
     this.packedBytesLength = packedBytesLength;
-    this.scratch = new byte[packedBytesLength];
+    this.block = new byte[packedBytesDocIDLength * size];
+    this.size = size;
+    this.scratch = new byte[packedBytesDocIDLength];
     if (size > 0) {
       pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength);
     } else {
@@ -54,7 +54,7 @@ public final class HeapPointWriter implements PointWriter {
   /** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
   public PointValue getPackedValueSlice(int index) {
     assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
-    pointValue.setValue(index * packedBytesLength, docIDs[index]);
+    pointValue.setOffset(index * packedBytesDocIDLength);
     return pointValue;
   }
 
@@ -63,8 +63,12 @@ public final class HeapPointWriter implements PointWriter {
     assert closed == false : "point writer is already closed";
     assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
     assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
-    System.arraycopy(packedValue, 0, block, nextWrite * packedBytesLength, packedBytesLength);
-    docIDs[nextWrite] = docID;
+    System.arraycopy(packedValue, 0, block, nextWrite * packedBytesDocIDLength, packedBytesLength);
+    int position = nextWrite * packedBytesDocIDLength + packedBytesLength;
+    block[position] = (byte) (docID >> 24);
+    block[++position] = (byte) (docID >> 16);
+    block[++position] = (byte) (docID >> 8);
+    block[++position] = (byte) (docID >> 0);
     nextWrite++;
   }
 
@@ -72,27 +76,23 @@ public final class HeapPointWriter implements PointWriter {
   public void append(PointValue pointValue) {
     assert closed == false : "point writer is already closed";
     assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
-    BytesRef packedValue = pointValue.packedValue();
-    assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + (packedBytesLength) + "] but was [" + packedValue.length + "]";
-    System.arraycopy(packedValue.bytes, packedValue.offset, block, nextWrite * packedBytesLength, packedBytesLength);
-    docIDs[nextWrite] = pointValue.docID();
+    BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
+    assert packedValueDocID.length == packedBytesDocIDLength : "[packedValue] must have length [" + (packedBytesDocIDLength) + "] but was [" + packedValueDocID.length + "]";
+    System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset, block, nextWrite * packedBytesDocIDLength, packedBytesDocIDLength);
     nextWrite++;
   }
 
   public void swap(int i, int j) {
-    int docID = docIDs[i];
-    docIDs[i] = docIDs[j];
-    docIDs[j] = docID;
 
-    int indexI = i * packedBytesLength;
-    int indexJ = j * packedBytesLength;
+    int indexI = i * packedBytesDocIDLength;
+    int indexJ = j * packedBytesDocIDLength;
 
     // scratch1 = values[i]
-    System.arraycopy(block, indexI, scratch, 0, packedBytesLength);
+    System.arraycopy(block, indexI, scratch, 0, packedBytesDocIDLength);
     // values[i] = values[j]
-    System.arraycopy(block, indexJ, block, indexI, packedBytesLength);
+    System.arraycopy(block, indexJ, block, indexI, packedBytesDocIDLength);
     // values[j] = scratch1
-    System.arraycopy(scratch, 0, block, indexJ, packedBytesLength);
+    System.arraycopy(scratch, 0, block, indexJ, packedBytesDocIDLength);
   }
 
   public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
@@ -102,8 +102,8 @@ public final class HeapPointWriter implements PointWriter {
       for (int dim = 0; dim < numDataDims; dim++) {
         final int start = dim * bytesPerDim + commonPrefixLengths[dim];
         final int end = dim * bytesPerDim + bytesPerDim;
-        if (Arrays.mismatch(block, i * packedBytesLength + start, i * packedBytesLength + end,
-            block, (i - 1) * packedBytesLength  + start, (i - 1) * packedBytesLength + end) != -1) {
+        if (Arrays.mismatch(block, i * packedBytesDocIDLength + start, i * packedBytesDocIDLength + end,
+            block, (i - 1) * packedBytesDocIDLength  + start, (i - 1) * packedBytesDocIDLength + end) != -1) {
           leafCardinality++;
           break;
         }
@@ -120,9 +120,9 @@ public final class HeapPointWriter implements PointWriter {
   @Override
   public PointReader getReader(long start, long length) {
     assert closed : "point writer is still open and trying to get a reader";
-    assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
+    assert start + length <= size: "start=" + start + " length=" + length + " docIDs.length=" + size;
     assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
-    return new HeapPointReader(block, packedBytesLength, docIDs, (int) start, Math.toIntExact(start+length));
+    return new HeapPointReader(block, packedBytesLength, (int) start, Math.toIntExact(start+length));
   }
 
   @Override
@@ -136,6 +136,6 @@ public final class HeapPointWriter implements PointWriter {
 
   @Override
   public String toString() {
-    return "HeapPointWriter(count=" + nextWrite + " size=" + docIDs.length + ")";
+    return "HeapPointWriter(count=" + nextWrite + " size=" + size + ")";
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
index 5323476267d..15288590cce 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
@@ -78,7 +78,8 @@ public final class MutablePointsReaderUtils {
           protected int comparePivot(int j) {
             if (k < packedBytesLength) {
               reader.getValue(j, scratch);
-              int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k, scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
+              int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k,
+                  scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
               if (cmp != 0) {
                 return cmp;
               }
@@ -92,14 +93,16 @@ public final class MutablePointsReaderUtils {
   }
 
   /** Sort points on the given dimension. */
-  public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
+  public static void sortByDim(int numDataDim, int numIndexDim, int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
                                MutablePointValues reader, int from, int to,
                                BytesRef scratch1, BytesRef scratch2) {
 
+    final int start = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
+    final int dimEnd =  sortedDim * bytesPerDim + bytesPerDim;
+    final int dataStart = numIndexDim * bytesPerDim;
+    final int dataEnd = dataStart + (numDataDim - numIndexDim) * bytesPerDim;
     // No need for a fancy radix sort here, this is called on the leaves only so
     // there are not many values to sort
-    final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
-    final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
     new IntroSorter() {
 
       final BytesRef pivot = scratch1;
@@ -119,9 +122,14 @@ public final class MutablePointsReaderUtils {
       @Override
       protected int comparePivot(int j) {
         reader.getValue(j, scratch2);
-        int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + offset, pivot.offset + offset + numBytesToCompare, scratch2.bytes, scratch2.offset + offset, scratch2.offset + offset + numBytesToCompare);
+        int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + start, pivot.offset + dimEnd, scratch2.bytes,
+            scratch2.offset + start, scratch2.offset + dimEnd);
         if (cmp == 0) {
-          cmp = pivotDoc - reader.getDocID(j);
+          cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
+              scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
+          if (cmp == 0) {
+            cmp = pivotDoc - reader.getDocID(j);
+          }
         }
         return cmp;
       }
@@ -131,16 +139,20 @@ public final class MutablePointsReaderUtils {
   /** Partition points around {@code mid}. All values on the left must be less
    *  than or equal to it and all values on the right must be greater than or
    *  equal to it. */
-  public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
+  public static void partition(int numDataDim, int numIndexDim, int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
                                MutablePointValues reader, int from, int to, int mid,
                                BytesRef scratch1, BytesRef scratch2) {
-    final int offset = splitDim * bytesPerDim + commonPrefixLen;
-    final int cmpBytes = bytesPerDim - commonPrefixLen;
+    final int dimOffset = splitDim * bytesPerDim + commonPrefixLen;
+    final int dimCmpBytes = bytesPerDim - commonPrefixLen;
+    final int dataOffset = numIndexDim * bytesPerDim;
+    final int dataCmpBytes = (numDataDim - numIndexDim) * bytesPerDim + dimCmpBytes;
     final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
-    new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
+    new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
 
       @Override
       protected Selector getFallbackSelector(int k) {
+        final int dataStart = (k < dimCmpBytes) ? dataOffset : dataOffset + k - dimCmpBytes;
+        final int dataEnd = numDataDim * bytesPerDim;
         return new IntroSelector() {
 
           final BytesRef pivot = scratch1;
@@ -159,9 +171,18 @@ public final class MutablePointsReaderUtils {
 
           @Override
           protected int comparePivot(int j) {
-            if (k < cmpBytes) {
+            if (k < dimCmpBytes) {
               reader.getValue(j, scratch2);
-              int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + offset + k, pivot.offset + offset + k + cmpBytes - k, scratch2.bytes, scratch2.offset + offset + k, scratch2.offset + offset + k + cmpBytes - k);
+              int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + dimOffset + k, pivot.offset + dimOffset + dimCmpBytes,
+                  scratch2.bytes, scratch2.offset + dimOffset + k, scratch2.offset + dimOffset + dimCmpBytes);
+              if (cmp != 0) {
+                return cmp;
+              }
+            }
+            if (k < dataCmpBytes) {
+              reader.getValue(j, scratch2);
+              int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
+                  scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
               if (cmp != 0) {
                 return cmp;
               }
@@ -178,10 +199,12 @@ public final class MutablePointsReaderUtils {
 
       @Override
       protected int byteAt(int i, int k) {
-        if (k < cmpBytes) {
-          return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
+        if (k < dimCmpBytes) {
+          return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
+        } else if (k < dataCmpBytes) {
+          return Byte.toUnsignedInt(reader.getByteAt(i, dataOffset + k - dimCmpBytes));
         } else {
-          final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
+          final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
           return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
         }
       }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 6218429b071..030bd719fea 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -137,12 +137,14 @@ public final class OfflinePointReader implements PointReader {
    */
   static class OfflinePointValue implements PointValue {
 
-    BytesRef packedValue;
-    BytesRef docIDBytes;
+    final BytesRef packedValue;
+    final BytesRef packedValueDocID;
+    final int packedValueLength;
 
     OfflinePointValue(byte[] value, int packedValueLength) {
-      packedValue = new BytesRef(value, 0, packedValueLength);
-      docIDBytes = new BytesRef(value, packedValueLength, Integer.BYTES);
+      this.packedValueLength = packedValueLength;
+      this.packedValue = new BytesRef(value, 0, packedValueLength);
+      this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
     }
 
     /**
@@ -150,7 +152,7 @@ public final class OfflinePointReader implements PointReader {
      */
     public void setOffset(int offset) {
       packedValue.offset = offset;
-      docIDBytes.offset = offset + packedValue.length;
+      packedValueDocID.offset = offset;
     }
 
     @Override
@@ -160,14 +162,14 @@ public final class OfflinePointReader implements PointReader {
 
     @Override
     public int docID() {
-      int position =docIDBytes.offset;
-      return ((docIDBytes.bytes[position] & 0xFF) << 24) | ((docIDBytes.bytes[++position] & 0xFF) << 16)
-          | ((docIDBytes.bytes[++position] & 0xFF) <<  8) |  (docIDBytes.bytes[++position] & 0xFF);
+      int position = packedValueDocID.offset + packedValueLength;
+      return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
+          | ((packedValueDocID.bytes[++position] & 0xFF) <<  8) |  (packedValueDocID.bytes[++position] & 0xFF);
     }
 
     @Override
-    public BytesRef docIDBytes() {
-      return docIDBytes;
+    public BytesRef packedValueDocIDBytes() {
+      return packedValueDocID;
     }
   }
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 4f4ce9ee740..d9fa86f5307 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -62,12 +62,9 @@ public final class OfflinePointWriter implements PointWriter {
   @Override
   public void append(PointValue pointValue) throws IOException {
     assert closed == false : "Point writer is already closed";
-    BytesRef packedValue = pointValue.packedValue();
-    assert packedValue.length == packedBytesLength  : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
-    out.writeBytes(packedValue.bytes, packedValue.offset, packedValue.length);
-    BytesRef docIDBytes = pointValue.docIDBytes();
-    assert docIDBytes.length == Integer.BYTES  : "[docIDBytes] must have length [" + Integer.BYTES + "] but was [" + docIDBytes.length + "]";
-    out.writeBytes(docIDBytes.bytes, docIDBytes.offset, docIDBytes.length);
+    BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
+    assert packedValueDocID.length == packedBytesLength + Integer.BYTES : "[packedValue and docID] must have length [" + (packedBytesLength + Integer.BYTES) + "] but was [" + packedValueDocID.length + "]";
+    out.writeBytes(packedValueDocID.bytes, packedValueDocID.offset, packedValueDocID.length);
     count++;
     assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count;
   }
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java
index 28506278782..d933e81ff61 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java
@@ -28,10 +28,11 @@ public interface PointValue {
   /** Returns the packed values for the dimensions */
   BytesRef packedValue();
 
-  /** Returns the document id */
+  /** Returns the docID */
   int docID();
 
-  /** Returns the byte representation of the document id */
-  BytesRef docIDBytes();
+  /** Returns the byte representation of the packed value
+   * together with the docID */
+  BytesRef packedValueDocIDBytes();
 
 }
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index 338e459ad3d..fd80be663bd 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -1097,6 +1097,80 @@ public class TestBKD extends LuceneTestCase {
     }
   }
 
+  public void testCheckDataDimOptimalOrder() throws IOException {
+    Directory dir = newDirectory();
+    final int numValues = atLeast(5000);
+    final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
+    final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
+    final double maxMB = (float) 3.0 + (3*random().nextDouble());
+
+    final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
+    final int numDataDims =  TestUtil.nextInt(random(), numIndexDims, 8);
+
+    final byte[] pointValue1 = new byte[numDataDims * numBytesPerDim];
+    final byte[] pointValue2 = new byte[numDataDims * numBytesPerDim];
+    random().nextBytes(pointValue1);
+    random().nextBytes(pointValue2);
+    // equal index dimensions but different data dimensions
+    for (int i = 0; i < numIndexDims; i++) {
+        System.arraycopy(pointValue1, i * numBytesPerDim, pointValue2, i * numBytesPerDim, numBytesPerDim);
+    }
+
+    BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode,
+        maxMB, 2 * numValues);
+    for (int i = 0; i < numValues; ++i) {
+      w.add(pointValue1, i);
+      w.add(pointValue2, i);
+    }
+    final long indexFP;
+    try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+      indexFP = w.finish(out);
+      w.close();
+    }
+
+    IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
+    pointsIn.seek(indexFP);
+    BKDReader points = new BKDReader(pointsIn);
+
+    points.intersect(new IntersectVisitor() {
+
+      byte[] previous = null;
+      boolean hasChanged = false;
+
+      @Override
+      public void visit(int docID) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void visit(int docID, byte[] packedValue) {
+        if (previous == null) {
+          previous = new byte[numDataDims * numBytesPerDim];
+          System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
+        } else {
+          int mismatch = Arrays.mismatch(packedValue, previous);
+          if (mismatch != -1) {
+            if (hasChanged == false) {
+              hasChanged = true;
+              System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
+            } else {
+              fail("Points are not in optimal order");
+            }
+          }
+        }
+      }
+
+      @Override
+      public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+        return Relation.CELL_CROSSES_QUERY;
+      }
+    });
+
+
+    pointsIn.close();
+    dir.close();
+  }
+
   public void test2DLongOrdsOffline() throws Exception {
     try (Directory dir = newDirectory()) {
       int numDocs = 100000;
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
index 075cd1cd9d8..898643d3ca1 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
@@ -208,7 +208,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
   }
 
   private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
-    BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
+    BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
     //we only split by indexed dimension so we check for each only those dimension
     for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
       //We need to make a copy of the data as it is deleted in the process
@@ -224,9 +224,15 @@ public class TestBKDRadixSelector extends LuceneTestCase {
       int cmp = Arrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions);
       assertTrue(cmp <= 0);
       if (cmp == 0) {
-        int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint);
-        int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint);
-        assertTrue(minDocId >= maxDocID);
+        byte[] maxDataDim = getMaxDataDimension(slices[0], bytesPerDimensions, dataDimensions, indexDimensions, max, splitDim);
+        byte[] minDataDim = getMinDataDimension(slices[1], bytesPerDimensions, dataDimensions, indexDimensions, min, splitDim);
+        cmp = Arrays.compareUnsigned(maxDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions, minDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions);
+        assertTrue(cmp <= 0);
+        if (cmp == 0) {
+          int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,maxDataDim);
+          int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,minDataDim);
+          assertTrue(minDocId >= maxDocID);
+        }
       }
       assertTrue(Arrays.equals(partitionPoint, min));
       slices[0].writer.destroy();
@@ -292,14 +298,17 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     return min;
   }
 
-  private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws  IOException {
+  private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws  IOException {
    int docID = Integer.MAX_VALUE;
     try (PointReader reader = p.writer.getReader(p.start, p.count)) {
       while (reader.next()) {
         PointValue pointValue = reader.pointValue();
         BytesRef packedValue = pointValue.packedValue();
         int offset = dimension * bytesPerDimension;
-        if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
+        int dataOffset = indexDims * bytesPerDimension;
+        int dataLength = (dataDims - indexDims) * bytesPerDimension;
+        if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
+          && Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
           int newDocID = pointValue.docID();
           if (newDocID < docID) {
             docID = newDocID;
@@ -310,6 +319,26 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     return docID;
   }
 
+  private byte[] getMinDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] minDim, int splitDim) throws  IOException {
+    byte[] min = new byte[(dataDims - indexDims) * bytesPerDimension];
+    Arrays.fill(min, (byte) 0xff);
+    int offset = splitDim * bytesPerDimension;
+    try (PointReader reader = p.writer.getReader(p.start, p.count)) {
+      byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
+      while (reader.next()) {
+        PointValue pointValue = reader.pointValue();
+        BytesRef packedValue = pointValue.packedValue();
+        if (Arrays.mismatch(minDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
+          System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
+          if (Arrays.compareUnsigned(min, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) > 0) {
+            System.arraycopy(value, 0, min, 0, (dataDims - indexDims) * bytesPerDimension);
+          }
+        }
+      }
+    }
+    return min;
+  }
+
   private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws  IOException {
     byte[] max = new byte[bytesPerDimension];
     Arrays.fill(max, (byte) 0);
@@ -327,14 +356,37 @@ public class TestBKDRadixSelector extends LuceneTestCase {
     return max;
   }
 
-  private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws  IOException {
+  private byte[] getMaxDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] maxDim, int splitDim) throws  IOException {
+    byte[] max = new byte[(dataDims - indexDims) * bytesPerDimension];
+    Arrays.fill(max, (byte) 0);
+    int offset = splitDim * bytesPerDimension;
+    try (PointReader reader = p.writer.getReader(p.start, p.count)) {
+      byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
+      while (reader.next()) {
+        PointValue pointValue = reader.pointValue();
+        BytesRef packedValue = pointValue.packedValue();
+        if (Arrays.mismatch(maxDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
+          System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
+          if (Arrays.compareUnsigned(max, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) < 0) {
+            System.arraycopy(value, 0, max, 0, (dataDims - indexDims) * bytesPerDimension);
+          }
+        }
+      }
+    }
+    return max;
+  }
+
+  private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws  IOException {
     int docID = Integer.MIN_VALUE;
     try (PointReader reader = p.writer.getReader(p.start, p.count)) {
       while (reader.next()) {
         PointValue pointValue = reader.pointValue();
         BytesRef packedValue = pointValue.packedValue();
         int offset = dimension * bytesPerDimension;
-        if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
+        int dataOffset = indexDims * bytesPerDimension;
+        int dataLength = (dataDims - indexDims) * bytesPerDimension;
+        if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
+            && Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
           int newDocID = pointValue.docID();
           if (newDocID > docID) {
             docID = newDocID;
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
index a7cebd024af..1b0b8922002 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
@@ -116,7 +116,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
   private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
     int packedBytesLength = dataDimensions * bytesPerDim;
     Directory dir = newDirectory();
-    BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDim, 1000, dir, "test");
+    BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDim, 1000, dir, "test");
     // we check for each dimension
     for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
       radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
@@ -129,6 +129,11 @@ public class TestBKDRadixSort extends LuceneTestCase {
         BytesRef value = pointValue.packedValue();
         int cmp = Arrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
         assertTrue(cmp >= 0);
+        if (cmp == 0) {
+          int dataOffset = indexDimensions * bytesPerDim;
+          cmp = Arrays.compareUnsigned(value.bytes, value.offset + dataOffset, value.offset + packedBytesLength, previous, dataOffset, packedBytesLength);
+          assertTrue(cmp >= 0);
+        }
         if (cmp == 0) {
           assertTrue(pointValue.docID() >= previousDocId);
         }
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
index 6f6bf0240cf..e17eab1f9a2 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
@@ -37,7 +37,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
   private void doTestSort() {
     final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
     final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
-    Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
+    Point[] points = createRandomPoints(1, 1, bytesPerDim, maxDoc, new int[1]);
     DummyPointsReader reader = new DummyPointsReader(points);
     MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
     Arrays.sort(points, new Comparator<Point>() {
@@ -61,25 +61,15 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
   }
 
   private void doTestSortByDim() {
-    final int numDims = TestUtil.nextInt(random(), 1, 8);
+    final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
+    final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
     final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
     final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
-    Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
-    int[] commonPrefixLengths = new int[numDims];
-    for (int i = 0; i < commonPrefixLengths.length; ++i) {
-      commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
-    }
-    BytesRef firstValue = points[0].packedValue;
-    for (int i = 1; i < points.length; ++i) {
-      for (int dim = 0; dim < numDims; ++dim) {
-        int offset = dim * bytesPerDim;
-        BytesRef packedValue = points[i].packedValue;
-        System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
-      }
-    }
+    int[] commonPrefixLengths = new int[numDataDims];
+    Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
     DummyPointsReader reader = new DummyPointsReader(points);
-    final int sortedDim = random().nextInt(numDims);
-    MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
+    final int sortedDim = random().nextInt(numIndexDims);
+    MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
         new BytesRef(), new BytesRef());
     for (int i = 1; i < points.length; ++i) {
       final int offset = sortedDim * bytesPerDim;
@@ -87,7 +77,13 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
       BytesRef currentValue = reader.points[i].packedValue;
       int cmp = Arrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim);
       if (cmp == 0) {
-        cmp = reader.points[i - 1].doc - reader.points[i].doc;
+        int dataDimOffset = numIndexDims * bytesPerDim;
+        int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
+        cmp = Arrays.compareUnsigned(previousValue.bytes, previousValue.offset + dataDimOffset, previousValue.offset + dataDimOffset + dataDimsLength,
+            currentValue.bytes, currentValue.offset + dataDimOffset, currentValue.offset + dataDimOffset + dataDimsLength);
+        if (cmp == 0) {
+          cmp = reader.points[i - 1].doc - reader.points[i].doc;
+        }
       }
       assertTrue(cmp <= 0);
     }
@@ -100,29 +96,31 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
   }
 
   private void doTestPartition() {
-    final int numDims = TestUtil.nextInt(random(), 1, 8);
+    final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
+    final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
     final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
+    int[] commonPrefixLengths  = new int[numDataDims];
     final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
-    Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
-    int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
-    final int splitDim =  random().nextInt(numDims);
-    BytesRef firstValue = points[0].packedValue;
-    for (int i = 1; i < points.length; ++i) {
-      BytesRef packedValue = points[i].packedValue;
-      int offset = splitDim * bytesPerDim;
-      System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
-    }
+    Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
+    final int splitDim =  random().nextInt(numIndexDims);
     DummyPointsReader reader = new DummyPointsReader(points);
     final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
-    MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
+    MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLengths[splitDim], reader, 0, points.length, pivot,
         new BytesRef(), new BytesRef());
     BytesRef pivotValue = reader.points[pivot].packedValue;
     int offset = splitDim * bytesPerDim;
     for (int i = 0; i < points.length; ++i) {
       BytesRef value = reader.points[i].packedValue;
-      int cmp = Arrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim, pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
+      int cmp = Arrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim,
+          pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
       if (cmp == 0) {
-        cmp = reader.points[i].doc - reader.points[pivot].doc;
+        int dataDimOffset = numIndexDims * bytesPerDim;
+        int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
+        cmp = Arrays.compareUnsigned(value.bytes, value.offset + dataDimOffset, value.offset + dataDimOffset + dataDimsLength,
+            pivotValue.bytes, pivotValue.offset + dataDimOffset, pivotValue.offset + dataDimOffset + dataDimsLength);
+        if (cmp == 0) {
+          cmp = reader.points[i].doc - reader.points[pivot].doc;
+        }
       }
       if (i < pivot) {
         assertTrue(cmp <= 0);
@@ -134,14 +132,54 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
     }
   }
 
-  private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
-    final int packedBytesLength = numDims * bytesPerDim;
+  private static Point[] createRandomPoints(int numDataDims, int numIndexdims, int bytesPerDim, int maxDoc, int[] commonPrefixLengths) {
+    assertTrue(commonPrefixLengths.length == numDataDims);
+    final int packedBytesLength = numDataDims * bytesPerDim;
     final int numPoints = TestUtil.nextInt(random(), 1, 100000);
     Point[] points = new Point[numPoints];
-    for (int i = 0; i < numPoints; ++i) {
-       byte[] value = new byte[packedBytesLength];
-       random().nextBytes(value);
-       points[i] = new Point(value, random().nextInt(maxDoc));
+    if (random().nextInt(5) != 0) {
+      for (int i = 0; i < numPoints; ++i) {
+        byte[] value = new byte[packedBytesLength];
+        random().nextBytes(value);
+        points[i] = new Point(value, random().nextInt(maxDoc));
+      }
+      for (int i = 0; i < numDataDims; ++i) {
+        commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
+      }
+      BytesRef firstValue = points[0].packedValue;
+      for (int i = 1; i < points.length; ++i) {
+        for (int dim = 0; dim < numDataDims; ++dim) {
+          int offset = dim * bytesPerDim;
+          BytesRef packedValue = points[i].packedValue;
+          System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
+        }
+      }
+    } else {
+      //index dim are equal, data dims different
+      byte[] indexDims = new byte[numIndexdims * bytesPerDim];
+      random().nextBytes(indexDims);
+      byte[] dataDims = new byte[(numDataDims - numIndexdims) * bytesPerDim];
+      for (int i = 0; i < numPoints; ++i) {
+        byte[] value = new byte[packedBytesLength];
+        System.arraycopy(indexDims, 0, value, 0, numIndexdims * bytesPerDim);
+        random().nextBytes(dataDims);
+        System.arraycopy(dataDims, 0, value, numIndexdims * bytesPerDim, (numDataDims - numIndexdims) * bytesPerDim);
+        points[i] = new Point(value, random().nextInt(maxDoc));
+      }
+      for (int i = 0; i < numIndexdims; ++i) {
+        commonPrefixLengths[i] = bytesPerDim;
+      }
+      for (int i = numDataDims; i < numDataDims; ++i) {
+        commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
+      }
+      BytesRef firstValue = points[0].packedValue;
+      for (int i = 1; i < points.length; ++i) {
+        for (int dim = numIndexdims; dim < numDataDims; ++dim) {
+          int offset = dim * bytesPerDim;
+          BytesRef packedValue = points[i].packedValue;
+          System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
+        }
+      }
     }
     return points;
   }