LUCENE-6901: speed up dimensional values indexing and merging

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1716189 13f79535-47bb-0310-9956-ffa450edef68
2015-11-24 16:06:36 +00:00 · 2015-11-24 16:06:36 +00:00 · 50a1a78be5
parent ee91bffb99
commit 50a1a78be5
9 changed files with 807 additions and 136 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -89,6 +89,11 @@ Optimizations
  each leaf block in the default codec, to reduce the index
  size (Mike McCandless)
 * LUCENE-6901: Optimize dimensional values indexing: use faster
  IntroSorter instead of InPlaceMergeSorter, and specialize 1D
  merging to merge sort the already sorted segments instead of
  re-indexing (Mike McCandless)
 Changes in Runtime Behavior
 * LUCENE-6789: IndexSearcher's default Similarity is changed to BM25Similarity.
--- a/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/DimensionalWriter.java
@ -37,66 +37,73 @@ public abstract class DimensionalWriter implements Closeable {
  /** Write all values contained in the provided reader */
  public abstract void writeField(FieldInfo fieldInfo, DimensionalReader values) throws IOException;
  /** Default naive merge implemenation for one field: it just re-indexes all the values
   *  from the incoming segment.  The default codec overrides this for 1D fields and uses
   *  a faster but more complex implementation. */
  protected void mergeOneField(MergeState mergeState, FieldInfo fieldInfo) throws IOException {
    writeField(fieldInfo,
               new DimensionalReader() {
                 @Override
                 public void intersect(String fieldName, IntersectVisitor mergedVisitor) throws IOException {
                   if (fieldName.equals(fieldInfo.name) == false) {
                     throw new IllegalArgumentException("field name must match the field being merged");
                   }
                   for (int i=0;i<mergeState.dimensionalReaders.length;i++) {
                     DimensionalReader dimensionalReader = mergeState.dimensionalReaders[i];
                     if (dimensionalReader == null) {
                       // This segment has no dimensional values
                       continue;
                     }
                     MergeState.DocMap docMap = mergeState.docMaps[i];
                     int docBase = mergeState.docBase[i];
                     dimensionalReader.intersect(fieldInfo.name,
                                                 new IntersectVisitor() {
                                                   @Override
                                                   public void visit(int docID) {
                                                     // Should never be called because our compare method never returns Relation.CELL_INSIDE_QUERY
                                                     throw new IllegalStateException();
                                                   }
                                                   @Override
                                                   public void visit(int docID, byte[] packedValue) throws IOException {
                                                     int newDocID = docMap.get(docID);
                                                     if (newDocID != -1) {
                                                       // Not deleted:
                                                       mergedVisitor.visit(docBase + newDocID, packedValue);
                                                     }
                                                   }
                                                   @Override
                                                   public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                                                     // Forces this segment's DimensionalReader to always visit all docs + values:
                                                     return Relation.CELL_CROSSES_QUERY;
                                                   }
                                                 });
                   }
                 }
                 @Override
                 public void checkIntegrity() {
                   throw new UnsupportedOperationException();
                 }
                 @Override
                 public long ramBytesUsed() {
                   return 0L;
                 }
                 @Override
                 public void close() {
                 }
               });
  }
  /** Default merge implementation to merge incoming dimensional readers by visiting all their points and
   *  adding to this writer */
  public void merge(MergeState mergeState) throws IOException {
    for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
      if (fieldInfo.getDimensionCount() != 0) {
-        writeField(fieldInfo,
+        mergeOneField(mergeState, fieldInfo);
                   new DimensionalReader() {
                     @Override
                     public void intersect(String fieldName, IntersectVisitor mergedVisitor) throws IOException {
                       if (fieldName.equals(fieldInfo.name) == false) {
                         throw new IllegalArgumentException("field name must match the field being merged");
                       }
                       for (int i=0;i<mergeState.dimensionalReaders.length;i++) {
                         DimensionalReader dimensionalReader = mergeState.dimensionalReaders[i];
                         if (dimensionalReader == null) {
                           // This segment has no dimensional values
                           continue;
                         }
                         MergeState.DocMap docMap = mergeState.docMaps[i];
                         int docBase = mergeState.docBase[i];
                         dimensionalReader.intersect(fieldInfo.name,
                                                     new IntersectVisitor() {
                                                       @Override
                                                       public void visit(int docID) {
                                                         // Should never be called because our compare method never returns Relation.CELL_INSIDE_QUERY
                                                         throw new IllegalStateException();
                                                       }
                                                       @Override
                                                       public void visit(int docID, byte[] packedValue) throws IOException {
                                                         int newDocID = docMap.get(docID);
                                                         if (newDocID != -1) {
                                                           // Not deleted:
                                                           mergedVisitor.visit(docBase + newDocID, packedValue);
                                                         }
                                                       }
                                                       @Override
                                                       public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                                                         // Forces this segment's DimensionalReader to always visit all docs + values:
                                                         return Relation.CELL_CROSSES_QUERY;
                                                       }
                                                     });
                       }
                     }
                     @Override
                     public void checkIntegrity() {
                       throw new UnsupportedOperationException();
                     }
                     @Override
                     public long ramBytesUsed() {
                       return 0L;
                     }
                     @Override
                     public void close() {
                     }
                   });
      }
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalReader.java
@ -71,7 +71,9 @@ public class Lucene60DimensionalReader extends DimensionalReader implements Clos
        int fieldNumber = indexIn.readVInt();
        long fp = indexIn.readVLong();
        dataIn.seek(fp);
-        readers.put(fieldNumber, new BKDReader(dataIn));
+        BKDReader reader = new BKDReader(dataIn);
        readers.put(fieldNumber, reader);
        //reader.verify(readState.segmentInfo.maxDoc());
      }
      CodecUtil.checkFooter(indexIn);
      success = true;
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60DimensionalWriter.java
@ -19,7 +19,9 @@ package org.apache.lucene.codecs.lucene60;
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.lucene.codecs.CodecUtil;
@ -28,10 +30,13 @@ import org.apache.lucene.codecs.DimensionalWriter;
 import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
 import org.apache.lucene.index.DimensionalValues.Relation;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.bkd.BKDReader;
 import org.apache.lucene.util.bkd.BKDWriter;
 /** Writes dimensional values */
@ -104,6 +109,61 @@ public class Lucene60DimensionalWriter extends DimensionalWriter implements Clos
    }
  }
  @Override
  public void merge(MergeState mergeState) throws IOException {
    for(DimensionalReader reader : mergeState.dimensionalReaders) {
      if (reader instanceof Lucene60DimensionalReader == false) {
        // We can only bulk merge when all to-be-merged segments use our format:
        super.merge(mergeState);
        return;
      }
    }
    for (FieldInfo fieldInfo : mergeState.mergeFieldInfos) {
      if (fieldInfo.getDimensionCount() != 0) {
        if (fieldInfo.getDimensionCount() == 1) {
          //System.out.println("MERGE: field=" + fieldInfo.name);
          // Optimize the 1D case to use BKDWriter.merge, which does a single merge sort of the
          // already sorted incoming segments, instead of trying to sort all points again as if
          // we were simply reindexing them:
          try (BKDWriter writer = new BKDWriter(writeState.directory,
                                                writeState.segmentInfo.name,
                                                fieldInfo.getDimensionCount(),
                                                fieldInfo.getDimensionNumBytes(),
                                                maxPointsInLeafNode,
                                                maxMBSortInHeap)) {
            List<BKDReader> bkdReaders = new ArrayList<>();
            List<MergeState.DocMap> docMaps = new ArrayList<>();
            List<Integer> docIDBases = new ArrayList<>();
            for(int i=0;i<mergeState.dimensionalReaders.length;i++) {
              DimensionalReader reader = mergeState.dimensionalReaders[i];
              Lucene60DimensionalReader reader60 = (Lucene60DimensionalReader) reader;
              if (reader60 != null) {
                // TODO: I could just use the merged fieldInfo.number instead of resolving to this
                // reader's FieldInfo, right?  Field numbers are always consistent across segments,
                // since when?
                FieldInfos readerFieldInfos = mergeState.fieldInfos[i];
                FieldInfo readerFieldInfo = readerFieldInfos.fieldInfo(fieldInfo.name);
                if (readerFieldInfo != null) {
                  BKDReader bkdReader = reader60.readers.get(readerFieldInfo.number);
                  if (bkdReader != null) {
                    docIDBases.add(mergeState.docBase[i]);
                    bkdReaders.add(bkdReader);
                    docMaps.add(mergeState.docMaps[i]);
                  }
                }
              }
            }
            indexFPs.put(fieldInfo.name, writer.merge(dataOut, docMaps, bkdReaders, docIDBases));
          }
        } else {
          mergeOneField(mergeState, fieldInfo);
        }
      }
    } 
  }  
  @Override
  public void close() throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -415,6 +415,8 @@ class DocumentsWriterPerThread {
      return null;
    }
    long t0 = System.nanoTime();
    if (infoStream.isEnabled("DWPT")) {
      infoStream.message("DWPT", "flush postings as segment " + flushState.segmentInfo.name + " numDocs=" + numDocsInRAM);
    }
@ -458,6 +460,9 @@ class DocumentsWriterPerThread {
      FlushedSegment fs = new FlushedSegment(segmentInfoPerCommit, flushState.fieldInfos,
                                             segmentDeletes, flushState.liveDocs, flushState.delCountOnFlush);
      sealFlushedSegment(fs);
      if (infoStream.isEnabled("DWPT")) {
        infoStream.message("DWPT", "flush time " + ((System.nanoTime() - t0)/1000000.0) + " msec");
      }
      return fs;
    } catch (Throwable th) {
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@ -25,7 +25,9 @@ import org.apache.lucene.index.DimensionalValues.IntersectVisitor;
 import org.apache.lucene.index.DimensionalValues.Relation;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
 /** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
 *
@ -34,7 +36,7 @@ import org.apache.lucene.util.RamUsageEstimator;
 public class BKDReader implements Accountable {
  // Packed array of byte[] holding all split values in the full binary tree:
  final private byte[] splitPackedValues; 
-  final private long[] leafBlockFPs;
+  final long[] leafBlockFPs;
  final private int leafNodeOffset;
  final int numDims;
  final int bytesPerDim;
@ -55,10 +57,12 @@ public class BKDReader implements Accountable {
    leafNodeOffset = numLeaves;
    splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
    // TODO: don't write split packed values[0]!
    in.readBytes(splitPackedValues, 0, splitPackedValues.length);
-    // Tree is fully balanced binary tree, so number of nodes = numLeaves-1, except our nodeIDs are 1-based (splitPackedValues[0] is unused):
+    // Read the file pointers to the start of each leaf block:
-    leafBlockFPs = new long[numLeaves];
+    long[] leafBlockFPs = new long[numLeaves];
    long lastFP = 0;
    for(int i=0;i<numLeaves;i++) {
      long delta = in.readVLong();
@ -66,6 +70,47 @@ public class BKDReader implements Accountable {
      lastFP += delta;
    }
    // Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
    // if it was created by BKDWriter.merge).  In this case the leaf nodes may straddle the two bottom
    // levels of the binary tree:
    if (numDims == 1 && numLeaves > 1) {
      //System.out.println("BKDR: numLeaves=" + numLeaves);
      int levelCount = 2;
      while (true) {
        //System.out.println("  cycle levelCount=" + levelCount);
        if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
          int lastLevel = 2*(numLeaves - levelCount);
          assert lastLevel >= 0;
          /*
          System.out.println("BKDR: lastLevel=" + lastLevel + " vs " + levelCount);
          System.out.println("FPs before:");
          for(int i=0;i<leafBlockFPs.length;i++) {
            System.out.println("  " + i + " " + leafBlockFPs[i]);
          }
          */
          if (lastLevel != 0) {
            // Last level is partially filled, so we must rotate the leaf FPs to match.  We do this here, after loading
            // at read-time, so that we can still delta code them on disk at write:
            //System.out.println("BKDR: now rotate index");
            long[] newLeafBlockFPs = new long[numLeaves];
            System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
            System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
            leafBlockFPs = newLeafBlockFPs;
          }
          /*
          System.out.println("FPs:");
          for(int i=0;i<leafBlockFPs.length;i++) {
            System.out.println("  " + i + " " + leafBlockFPs[i]);
          }
          */
          break;
        }
        levelCount *= 2;
      }
    }
    this.leafBlockFPs = leafBlockFPs;
    this.in = in;
  }
@ -81,7 +126,120 @@ public class BKDReader implements Accountable {
    this.splitPackedValues = splitPackedValues;
  }
-  private static final class IntersectState {
+  private static class VerifyVisitor implements IntersectVisitor {
    byte[] cellMinPacked;
    byte[] cellMaxPacked;
    byte[] lastPackedValue;
    final int numDims;
    final int bytesPerDim;
    final int maxDoc;
    public VerifyVisitor(int numDims, int bytesPerDim, int maxDoc) {
      this.numDims = numDims;
      this.bytesPerDim = bytesPerDim;
      this.maxDoc = maxDoc;
    }
    @Override
    public void visit(int docID) {
      throw new UnsupportedOperationException();
    }
    @Override
    public void visit(int docID, byte[] packedValue) {
      if (docID < 0 || docID >= maxDoc) {
        throw new RuntimeException("docID=" + docID + " is out of bounds of 0.." + maxDoc);
      }
      for(int dim=0;dim<numDims;dim++) {
        if (StringHelper.compare(bytesPerDim, cellMinPacked, dim*bytesPerDim, packedValue, dim*bytesPerDim) > 0) {
          throw new RuntimeException("value=" + new BytesRef(packedValue, dim*bytesPerDim, bytesPerDim) + " for docID=" + docID + " dim=" + dim + " is less than this leaf block's minimum=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim));
        }
        if (StringHelper.compare(bytesPerDim, cellMaxPacked, dim*bytesPerDim, packedValue, dim*bytesPerDim) < 0) {
          throw new RuntimeException("value=" + new BytesRef(packedValue, dim*bytesPerDim, bytesPerDim) + " for docID=" + docID + " dim=" + dim + " is greater than this leaf block's maximum=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
        }
      }
      if (numDims == 1) {
        // With only 1D, all values should always be in sorted order
        if (lastPackedValue == null) {
          lastPackedValue = Arrays.copyOf(packedValue, packedValue.length);
        } else if (BKDUtil.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0) > 0) {
          throw new RuntimeException("value=" + new BytesRef(packedValue) + " for docID=" + docID + " dim=0" + " sorts before last value=" + new BytesRef(lastPackedValue));
        } else {
          System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
        }
      }
    }
    @Override
    public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
      throw new UnsupportedOperationException();
    }
  }
  /** Only used for debugging, to make sure all values in each leaf block fall within the range expected by the index */
  // TODO: maybe we can get this into CheckIndex?
  public void verify(int maxDoc) throws IOException {
    //System.out.println("BKDR.verify this=" + this);
    // Visits every doc in every leaf block and confirms that
    // their values agree with the index:
    byte[] rootMinPacked = new byte[packedBytesLength];
    byte[] rootMaxPacked = new byte[packedBytesLength];
    Arrays.fill(rootMaxPacked, (byte) 0xff);
    IntersectState state = new IntersectState(in.clone(), numDims, packedBytesLength,
                                              maxPointsInLeafNode,
                                              new VerifyVisitor(numDims, bytesPerDim, maxDoc));
    verify(state, 1, rootMinPacked, rootMaxPacked);
  }
  private void verify(IntersectState state, int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked) throws IOException {
    if (nodeID >= leafNodeOffset) {
      int leafID = nodeID - leafNodeOffset;
      // In the unbalanced case it's possible the left most node only has one child:
      if (leafID < leafBlockFPs.length) {
        //System.out.println("CHECK nodeID=" + nodeID + " leaf=" + (nodeID-leafNodeOffset) + " offset=" + leafNodeOffset + " fp=" + leafBlockFPs[leafID]);
        //System.out.println("BKDR.verify leafID=" + leafID + " nodeID=" + nodeID + " fp=" + leafBlockFPs[leafID] + " min=" + new BytesRef(cellMinPacked) + " max=" + new BytesRef(cellMaxPacked));
        // Leaf node: check that all values are in fact in bounds:
        VerifyVisitor visitor = (VerifyVisitor) state.visitor;
        visitor.cellMinPacked = cellMinPacked;
        visitor.cellMaxPacked = cellMaxPacked;
        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
        visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
      } else {
        //System.out.println("BKDR.verify skip leafID=" + leafID);
      }
    } else {
      // Non-leaf node:
      int address = nodeID * (bytesPerDim+1);
      int splitDim = splitPackedValues[address] & 0xff;
      assert splitDim < numDims;
      byte[] splitPackedValue = new byte[packedBytesLength];
      // Recurse on left sub-tree:
      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
      System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      verify(state,
             2*nodeID,
             cellMinPacked, splitPackedValue);
      // Recurse on right sub-tree:
      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
      System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      verify(state,
             2*nodeID+1,
             splitPackedValue, cellMaxPacked);
    }
  }
  static final class IntersectState {
    final IndexInput in;
    final int[] scratchDocIDs;
    final byte[] scratchPackedValue;
@ -119,6 +277,7 @@ public class BKDReader implements Accountable {
    if (nodeID >= leafNodeOffset) {
      //System.out.println("ADDALL");
      visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
      // TODO: we can assert that the first value here in fact matches what the index claimed?
    } else {
      addAll(state, 2*nodeID);
      addAll(state, 2*nodeID+1);
@ -196,38 +355,43 @@ public class BKDReader implements Accountable {
    }
    if (nodeID >= leafNodeOffset) {
-      //System.out.println("FILTER");
+      // TODO: we can assert that the first value here in fact matches what the index claimed?
      // Leaf node; scan and filter all points in this block:
      int count = readDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.scratchDocIDs);
-      // Again, this time reading values and checking with the visitor
+      int leafID = nodeID - leafNodeOffset;
-      visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
+      
      // In the unbalanced case it's possible the left most node only has one child:
      if (leafID < leafBlockFPs.length) {
        // Leaf node; scan and filter all points in this block:
        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
        // Again, this time reading values and checking with the visitor
        visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
      }
    } else {
      // Non-leaf node: recurse on the split left and right nodes
      // TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here:
      int address = nodeID * (bytesPerDim+1);
      int splitDim = splitPackedValues[address] & 0xff;
      assert splitDim < numDims;
      // TODO: can we alloc & reuse this up front?
      byte[] splitValue = new byte[bytesPerDim];
      System.arraycopy(splitPackedValues, address+1, splitValue, 0, bytesPerDim);
      // TODO: can we alloc & reuse this up front?
      byte[] splitPackedValue = new byte[packedBytesLength];
      // Recurse on left sub-tree:
      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitValue, 0, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      intersect(state,
                2*nodeID,
                cellMinPacked, splitPackedValue);
      // Recurse on right sub-tree:
      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitValue, 0, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
      intersect(state,
                2*nodeID+1,
                splitPackedValue, cellMaxPacked);
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java
@ -69,8 +69,12 @@ public final class BKDUtil {
  /** Returns positive int if a &gt; b, negative int if a &lt; b and 0 if a == b */
  public static int compare(int bytesPerDim, byte[] a, int aIndex, byte[] b, int bIndex) {
    assert aIndex >= 0;
    assert bIndex >= 0;
    int aOffset = aIndex*bytesPerDim;
    int bOffset = bIndex*bytesPerDim;
    for(int i=0;i<bytesPerDim;i++) {
-      int cmp = (a[aIndex*bytesPerDim+i]&0xff) - (b[bIndex*bytesPerDim+i]&0xff);
+      int cmp = (a[aOffset+i]&0xff) - (b[bOffset+i]&0xff);
      if (cmp != 0) {
        return cmp;
      }
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@ -20,10 +20,13 @@ package org.apache.lucene.util.bkd;
 import java.io.Closeable;
 import java.io.EOFException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.List;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
@ -33,11 +36,13 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.IntroSorter;
 import org.apache.lucene.util.LongBitSet;
 import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
 import org.apache.lucene.util.OfflineSorter;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
 // TODO
 //   - the compression is somewhat stupid now (delta vInt for 1024 docIDs, no compression for the byte[] values even though they have high locality)
@ -210,16 +215,339 @@ public class BKDWriter implements Closeable {
    pointCount++;
  }
  private static class MergeReader {
    final BKDReader bkd;
    final BKDReader.IntersectState state;
    final MergeState.DocMap docMap;
    /** Base offset for all our docIDs */
    final int docIDBase;
    /** Current doc ID */
    public int docID;
    /** Which doc in this block we are up to */
    private int docBlockUpto;
    /** How many docs in the current block */
    private int docsInBlock;
    /** Which leaf block we are up to */
    private int blockID;
    public MergeReader(BKDReader bkd, MergeState.DocMap docMap, int docIDBase) throws IOException {
      this.bkd = bkd;
      state = new BKDReader.IntersectState(bkd.in.clone(),
                                           bkd.numDims,
                                           bkd.packedBytesLength,
                                           bkd.maxPointsInLeafNode,
                                           null);
      this.docMap = docMap;
      this.docIDBase = docIDBase;
      long minFP = Long.MAX_VALUE;
      //System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
      for(long fp : bkd.leafBlockFPs) {
        minFP = Math.min(minFP, fp);
        //System.out.println("  leaf fp=" + fp);
      }
      state.in.seek(minFP);
    }
    public boolean next() throws IOException {
      //System.out.println("MR.next this=" + this);
      while (true) {
        if (docBlockUpto == docsInBlock) {
          if (blockID == bkd.leafBlockFPs.length) {
            //System.out.println("  done!");
            return false;
          }
          //System.out.println("  new block @ fp=" + state.in.getFilePointer());
          docsInBlock = bkd.readDocIDs(state.in, state.in.getFilePointer(), state.scratchDocIDs);
          docBlockUpto = 0;
          for(int dim=0;dim<bkd.numDims;dim++) {
            int prefix = state.in.readVInt();
            state.commonPrefixLengths[dim] = prefix;
            if (prefix > 0) {
              state.in.readBytes(state.scratchPackedValue, dim*bkd.bytesPerDim, prefix);
            }
          }
          blockID++;
        }
        int oldDocID = state.scratchDocIDs[docBlockUpto++];
        int mappedDocID;
        if (docMap == null) {
          mappedDocID = oldDocID;
        } else {
          mappedDocID = docMap.get(oldDocID);
        }
        for(int dim=0;dim<bkd.numDims;dim++) {
          int prefix = state.commonPrefixLengths[dim];
          state.in.readBytes(state.scratchPackedValue, dim*bkd.bytesPerDim + prefix, bkd.bytesPerDim - prefix);
        }
        if (mappedDocID != -1) {
          // Not deleted!
          docID = mappedDocID;
          return true;
        }
      }
    }
  }
  private static class BKDMergeQueue extends PriorityQueue<MergeReader> {
    private final int bytesPerDim;
    public BKDMergeQueue(int bytesPerDim, int maxSize) {
      super(maxSize);
      this.bytesPerDim = bytesPerDim;
    }
    @Override
    public boolean lessThan(MergeReader a, MergeReader b) {
      assert a != b;
      int cmp = StringHelper.compare(bytesPerDim, a.state.scratchPackedValue, 0, b.state.scratchPackedValue, 0);
      if (cmp < 0) {
        return true;
      } else if (cmp > 0) {
        return false;
      }
      // Tie break by sorting smaller docIDs earlier:
      return a.docIDBase < b.docIDBase;
    }
  }
  /** More efficient bulk-add for incoming {@link BKDReader}s.  This does a merge sort of the already
   *  sorted values and currently only works when numDims==1. */
  public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<BKDReader> readers, List<Integer> docIDBases) throws IOException {
    if (numDims != 1) {
      throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
    }
    if (pointCount != 0) {
      throw new IllegalStateException("cannot mix add and merge");
    }
    //System.out.println("BKDW.merge segs=" + readers.size());
    // Catch user silliness:
    if (heapPointWriter == null && tempInput == null) {
      throw new IllegalStateException("already finished");
    }
    // Mark that we already finished:
    heapPointWriter = null;
    assert docMaps == null || readers.size() == docMaps.size();
    BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
    for(int i=0;i<readers.size();i++) {
      BKDReader bkd = readers.get(i);
      MergeState.DocMap docMap;
      if (docMaps == null) {
        docMap = null;
      } else {
        docMap = docMaps.get(i);
      }
      MergeReader reader = new MergeReader(bkd, docMap, docIDBases.get(i));
      if (reader.next()) {
        queue.add(reader);
      }
    }
    int leafCount = 0;
    List<Long> leafBlockFPs = new ArrayList<>();
    List<byte[]> leafBlockStartValues = new ArrayList<>();
    // Target halfway between min and max allowed for the leaf:
    int pointsPerLeafBlock = (int) (0.75 * maxPointsInLeafNode);
    //System.out.println("POINTS PER: " + pointsPerLeafBlock);
    byte[] lastPackedValue = new byte[bytesPerDim];
    byte[] firstPackedValue = new byte[bytesPerDim];
    long valueCount = 0;
    // Buffer up each leaf block's docs and values
    int[] leafBlockDocIDs = new int[maxPointsInLeafNode];
    byte[][] leafBlockPackedValues = new byte[maxPointsInLeafNode][];
    for(int i=0;i<maxPointsInLeafNode;i++) {
      leafBlockPackedValues[i] = new byte[packedBytesLength];
    }
    Arrays.fill(commonPrefixLengths, bytesPerDim);
    while (queue.size() != 0) {
      MergeReader reader = queue.top();
      // System.out.println("iter reader=" + reader);
      // NOTE: doesn't work with subclasses (e.g. SimpleText!)
      leafBlockDocIDs[leafCount] = reader.docIDBase + reader.docID;
      System.arraycopy(reader.state.scratchPackedValue, 0, leafBlockPackedValues[leafCount], 0, packedBytesLength);
      assert numDims > 1 || valueInOrder(valueCount++, lastPackedValue, reader.state.scratchPackedValue);
      if (leafCount == 0) {
        if (leafBlockFPs.size() > 0) {
          // Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
          leafBlockStartValues.add(Arrays.copyOf(reader.state.scratchPackedValue, bytesPerDim));
        }
        Arrays.fill(commonPrefixLengths, bytesPerDim);
        System.arraycopy(reader.state.scratchPackedValue, 0, firstPackedValue, 0, bytesPerDim);
      } else {
        // Find per-dim common prefix:
        for(int dim=0;dim<numDims;dim++) {
          int offset = dim * bytesPerDim;
          for(int j=0;j<commonPrefixLengths[dim];j++) {
            if (firstPackedValue[offset+j] != reader.state.scratchPackedValue[offset+j]) {
              commonPrefixLengths[dim] = j;
              break;
            }
          }
        }
      }
      leafCount++;
      if (reader.next()) {
        queue.updateTop();
      } else {
        // This segment was exhausted
        queue.pop();
      }
      // We write a block once we hit exactly the max count ... this is different from
      // when we flush a new segment, where we write between max/2 and max per leaf block,
      // so merged segments will behave differently from newly flushed segments:
      if (leafCount == pointsPerLeafBlock || queue.size() == 0) {
        leafBlockFPs.add(out.getFilePointer());
        checkMaxLeafNodeCount(leafBlockFPs.size());
        writeLeafBlockDocs(out, leafBlockDocIDs, 0, leafCount);
        writeCommonPrefixes(out, commonPrefixLengths, firstPackedValue);
        // Write the full values:
        for (int i=0;i<leafCount;i++) {
          writeLeafBlockPackedValue(out, commonPrefixLengths, leafBlockPackedValues[i]);
        }
        leafCount = 0;
      }
    }
    long indexFP = out.getFilePointer();
    int numInnerNodes = leafBlockStartValues.size();
    //System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts=" + leafBlockStartValues.size());
    byte[] index = new byte[(1+numInnerNodes) * (1+bytesPerDim)];
    rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
    long[] arr = new long[leafBlockFPs.size()];
    for(int i=0;i<leafBlockFPs.size();i++) {
      arr[i] = leafBlockFPs.get(i);
    }
    writeIndex(out, arr, index);
    return indexFP;
  }
  // TODO: there must be a simpler way?
  private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
    //System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
    if (count == 1) {
      // Leaf index node
      //System.out.println("  leaf index node");
      //System.out.println("  index[" + nodeID + "] = blockStartValues[" + offset + "]");
      System.arraycopy(leafBlockStartValues.get(offset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
    } else if (count > 1) {
      // Internal index node: binary partition of count
      int countAtLevel = 1;
      int totalCount = 0;
      while (true) {
        int countLeft = count - totalCount;
        //System.out.println("    cycle countLeft=" + countLeft + " coutAtLevel=" + countAtLevel);
        if (countLeft <= countAtLevel) {
          // This is the last level, possibly partially filled:
          int lastLeftCount = Math.min(countAtLevel/2, countLeft);
          assert lastLeftCount >= 0;
          int leftHalf = (totalCount-1)/2 + lastLeftCount;
          int rootOffset = offset + leftHalf;
          /*
          System.out.println("  last left count " + lastLeftCount);
          System.out.println("  leftHalf " + leftHalf + " rightHalf=" + (count-leftHalf-1));
          System.out.println("  rootOffset=" + rootOffset);
          */
          System.arraycopy(leafBlockStartValues.get(rootOffset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
          //System.out.println("  index[" + nodeID + "] = blockStartValues[" + rootOffset + "]");
          // TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree
          // under here, to save this while loop on each recursion
          // Recurse left
          rotateToTree(2*nodeID, offset, leftHalf, index, leafBlockStartValues);
          // Recurse right
          rotateToTree(2*nodeID+1, rootOffset+1, count-leftHalf-1, index, leafBlockStartValues);
          return;
        }
        totalCount += countAtLevel;
        countAtLevel *= 2;
      }
    } else {
      assert count == 0;
    }
  }
  // TODO: if we fixed each partition step to just record the file offset at the "split point", we could probably handle variable length
  // encoding and not have our own ByteSequencesReader/Writer
-  /** If dim=-1 we sort by docID, else by that dim. */
+  /** Sort the heap writer by the specified dim */
  private void sortHeapPointWriter(final HeapPointWriter writer, int start, int length, int dim) {
    assert pointCount < Integer.MAX_VALUE;
    //int[] swapCount = new int[1];
    //int[] cmpCount = new int[1];
    //System.out.println("SORT length=" + length);
    // All buffered points are still in heap; just do in-place sort:
-    new InPlaceMergeSorter() {
+    new IntroSorter() {
      private final byte[] pivotPackedValue = new byte[bytesPerDim];
      private int pivotDocID;
      private long pivotOrd;
      @Override
      protected void setPivot(int i) {
        pivotDocID = writer.docIDs[i];
        pivotOrd = writer.ords[i];
        int block = i / writer.valuesPerBlock;
        int index = i % writer.valuesPerBlock;
        System.arraycopy(writer.blocks.get(block), index*packedBytesLength+dim*bytesPerDim, pivotPackedValue, 0, bytesPerDim);
      }
      @Override
      protected int comparePivot(int j) {
        //cmpCount[0]++;
        int block = j / writer.valuesPerBlock;
        int index = j % writer.valuesPerBlock;
        assert index >= 0: "index=" + index + " j=" + j;
        int cmp = BKDUtil.compare(bytesPerDim, pivotPackedValue, 0, writer.blocks.get(block), index*numDims+dim);
        if (cmp != 0) {
          return cmp;
        }
        // Tie-break
        cmp = Integer.compare(pivotDocID, writer.docIDs[j]);
        if (cmp != 0) {
          return cmp;
        }
        return Long.compare(pivotOrd, writer.ords[j]);
      }
      @Override
      protected void swap(int i, int j) {
        int docID = writer.docIDs[i];
@ -230,21 +558,27 @@ public class BKDWriter implements Closeable {
        writer.ords[i] = writer.ords[j];
        writer.ords[j] = ord;
        byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock);
        int indexI = (i % writer.valuesPerBlock) * packedBytesLength;
        byte[] blockJ = writer.blocks.get(j / writer.valuesPerBlock);
        int indexJ = (j % writer.valuesPerBlock) * packedBytesLength;
        // scratch1 = values[i]
-        writer.readPackedValue(i, scratch1);
+        System.arraycopy(blockI, indexI, scratch1, 0, packedBytesLength);
-        // scratch2 = values[j]
+        // values[i] = values[j]
-        writer.readPackedValue(j, scratch2);
+        System.arraycopy(blockJ, indexJ, blockI, indexI, packedBytesLength);
        // values[i] = scratch2
        writer.writePackedValue(i, scratch2);
        // values[j] = scratch1
-        writer.writePackedValue(j, scratch1);
+        System.arraycopy(scratch1, 0, blockJ, indexJ, packedBytesLength);
      }
      @Override
      protected int compare(int i, int j) {
-        writer.readPackedValue(i, scratch1);
+        //cmpCount[0]++;
-        writer.readPackedValue(j, scratch2);
+        int blockI = i / writer.valuesPerBlock;
-        int cmp = BKDUtil.compare(bytesPerDim, scratch1, dim, scratch2, dim);
+        int dimI = i % writer.valuesPerBlock;
        int blockJ = j / writer.valuesPerBlock;
        int dimJ = j % writer.valuesPerBlock;
        int cmp = BKDUtil.compare(bytesPerDim, writer.blocks.get(blockI), dimI*numDims+dim, writer.blocks.get(blockJ), dimJ*numDims+dim);
        if (cmp != 0) {
          return cmp;
        }
@ -258,6 +592,7 @@ public class BKDWriter implements Closeable {
        return Long.compare(writer.ords[i], writer.ords[j]);
      }
    }.sort(start, start+length);
    //System.out.println("LEN=" + length + " SWAP=" + swapCount[0] + " CMP=" + cmpCount[0]);
  }
  private PointWriter sort(int dim) throws IOException {
@ -278,7 +613,10 @@ public class BKDWriter implements Closeable {
        sorted.copyFrom(heapPointWriter);
      }
      //long t0 = System.nanoTime();
      sortHeapPointWriter(sorted, 0, (int) pointCount, dim);
      //long t1 = System.nanoTime();
      //System.out.println("BKD: sort took " + ((t1-t0)/1000000.0) + " msec");
      sorted.close();
      return sorted;
@ -366,6 +704,12 @@ public class BKDWriter implements Closeable {
    }
  }
  private void checkMaxLeafNodeCount(int numLeaves) {
    if ((1+bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
      throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
    }
  }
  /** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
  public long finish(IndexOutput out) throws IOException {
    //System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
@ -381,7 +725,12 @@ public class BKDWriter implements Closeable {
      offlinePointWriter.close();
    }
-    LongBitSet ordBitSet = new LongBitSet(pointCount);
+    LongBitSet ordBitSet;
    if (numDims > 1) {
      ordBitSet = new LongBitSet(pointCount);
    } else {
      ordBitSet = null;
    }
    long countPerLeaf = pointCount;
    long innerNodeCount = 1;
@ -391,16 +740,9 @@ public class BKDWriter implements Closeable {
      innerNodeCount *= 2;
    }
-    //System.out.println("innerNodeCount=" + innerNodeCount);
+    int numLeaves = (int) innerNodeCount;
-    if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
+    checkMaxLeafNodeCount(numLeaves);
      throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
    }
    innerNodeCount--;
    int numLeaves = (int) (innerNodeCount+1);
    //System.out.println("LEAVES: " + numLeaves);
    // NOTE: we could save the 1+ here, to use a bit less heap at search time, but then we'd need a somewhat costly check at each
    // step of the recursion to recompute the split dim:
@ -548,7 +890,7 @@ public class BKDWriter implements Closeable {
  private byte[] markRightTree(long rightCount, int splitDim, PathSlice source, LongBitSet ordBitSet) throws IOException {
    // Now we mark ords that fall into the right half, so we can partition on all other dims that are not the split dim:
-    assert ordBitSet.cardinality() == 0: "cardinality=" + ordBitSet.cardinality();
+    assert numDims == 1 || ordBitSet.cardinality() == 0: "cardinality=" + ordBitSet.cardinality();
    // Read the split value, then mark all ords in the right tree (larger than the split value):
    try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount)) {
@ -556,17 +898,19 @@ public class BKDWriter implements Closeable {
      assert result;
      System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
      if (numDims > 1) {
      ordBitSet.set(reader.ord());
      // Start at 1 because we already did the first value above (so we could keep the split value):
      for(int i=1;i<rightCount;i++) {
        result = reader.next();
        assert result;
        ordBitSet.set(reader.ord());
      }
-      assert rightCount == ordBitSet.cardinality(): "rightCount=" + rightCount + " cardinality=" + ordBitSet.cardinality();
+        // Start at 1 because we already did the first value above (so we could keep the split value):
        for(int i=1;i<rightCount;i++) {
          result = reader.next();
          assert result;
          ordBitSet.set(reader.ord());
        }
        assert rightCount == ordBitSet.cardinality(): "rightCount=" + rightCount + " cardinality=" + ordBitSet.cardinality();
      }
    }
    return scratch1;
@ -643,7 +987,8 @@ public class BKDWriter implements Closeable {
      PathSlice source = slices[0];
      if (source.writer instanceof HeapPointWriter == false) {
-        // Adversarial cases can cause this, e.g. very lopsided data, all equal points
+        // Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
        // offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
        source = switchToHeap(source);
      }
@ -652,17 +997,19 @@ public class BKDWriter implements Closeable {
      // Save the block file pointer:
      leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
      //System.out.println("  write leaf block @ fp=" + out.getFilePointer());
      // Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
      // loading the values:
-      writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), Math.toIntExact(source.count));
+      int count = Math.toIntExact(source.count);
      writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), count);
      // TODO: we should delta compress / only write suffix bytes, like terms dict (the values will all be "close together" since we are at
      // a leaf cell):
      // First pass: find the per-dim common prefix for all values in this block:
      Arrays.fill(commonPrefixLengths, bytesPerDim);
-      for (int i=0;i<source.count;i++) {
+      for (int i=0;i<count;i++) {
        if (i == 0) {
          heapSource.readPackedValue(Math.toIntExact(source.start + i), scratch1);
        } else {
@ -682,9 +1029,11 @@ public class BKDWriter implements Closeable {
      writeCommonPrefixes(out, commonPrefixLengths, scratch1);
      // Second pass: write the full values:
      byte[] lastPackedValue = new byte[bytesPerDim];
      for (int i=0;i<source.count;i++) {
        // TODO: we could do bulk copying here, avoiding the intermediate copy:
        heapSource.readPackedValue(Math.toIntExact(source.start + i), scratchPackedValue);
        assert numDims != 1 || valueInOrder(i, lastPackedValue, scratchPackedValue);
        // Make sure this value does in fact fall within this leaf cell:
        assert valueInBounds(scratchPackedValue, minPackedValue, maxPackedValue);
@ -694,7 +1043,12 @@ public class BKDWriter implements Closeable {
    } else {
      // Inner node: partition/recurse
-      int splitDim = split(minPackedValue, maxPackedValue);
+      int splitDim;
      if (numDims > 1) {
        splitDim = split(minPackedValue, maxPackedValue);
      } else {
        splitDim = 0;
      }
      PathSlice source = slices[splitDim];
@ -758,7 +1112,9 @@ public class BKDWriter implements Closeable {
        }
      }
-      ordBitSet.clear(0, pointCount);
+      if (numDims > 1) {
        ordBitSet.clear(0, pointCount);
      }
      // Recurse on left tree:
      build(2*nodeID, leafNodeOffset, leftSlices,
@ -787,6 +1143,15 @@ public class BKDWriter implements Closeable {
    }
  }
  // only called from assert
  private boolean valueInOrder(long ord, byte[] lastPackedValue, byte[] packedValue) {
    if (ord > 0 && BKDUtil.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0) > 0) {
      throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue) + " ord=" + ord);
    }
    System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
    return true;
  }
  PointWriter getPointWriter(long count) throws IOException {
    if (count <= maxPointsSortInHeap) {
      int size = Math.toIntExact(count);
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@ -114,7 +114,7 @@ public class TestBKD extends LuceneTestCase {
    try (Directory dir = getDirectory(numDocs)) {
      int numDims = TestUtil.nextInt(random(), 1, 5);
      int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
-      float maxMB = (float) 0.1 + (3*random().nextFloat());
+      float maxMB = (float) 3.0 + (3*random().nextFloat());
      BKDWriter w = new BKDWriter(dir, "tmp", numDims, 4, maxPointsInLeafNode, maxMB);
      if (VERBOSE) {
@ -238,7 +238,7 @@ public class TestBKD extends LuceneTestCase {
      int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
      int numDims = TestUtil.nextInt(random(), 1, 5);
      int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
-      float maxMB = (float) 0.1 + (3*random().nextFloat());
+      float maxMB = (float) 3.0 + (3*random().nextFloat());
      BKDWriter w = new BKDWriter(dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
      BigInteger[][] docs = new BigInteger[numDocs][];
@ -425,6 +425,7 @@ public class TestBKD extends LuceneTestCase {
  private void doTestRandomBinary(int count) throws Exception {
    int numDocs = TestUtil.nextInt(random(), count, count*2);
    int numBytesPerDim = TestUtil.nextInt(random(), 2, 30);
    int numDims = TestUtil.nextInt(random(), 1, 5);
    byte[][][] docValues = new byte[numDocs][][];
@ -601,17 +602,9 @@ public class TestBKD extends LuceneTestCase {
  /** docIDs can be null, for the single valued case, else it maps value to docID */
  private void verify(byte[][][] docValues, int[] docIDs, int numDims, int numBytesPerDim) throws Exception {
    try (Directory dir = getDirectory(docValues.length)) {
-      while (true) {
+      int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
-        int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
+      double maxMB = (float) 3.0 + (3*random().nextDouble());
-        double maxMB = (float) 0.1 + (3*random().nextDouble());
+      verify(dir, docValues, docIDs, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
        try {
          verify(dir, docValues, docIDs, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
          return;
        } catch (IllegalArgumentException iae) {
          // This just means we got a too-small maxMB for the maxPointsInLeafNode; just retry
          assertTrue(iae.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
        }
      }
    }
  }
@ -620,10 +613,32 @@ public class TestBKD extends LuceneTestCase {
    if (VERBOSE) {
      System.out.println("TEST: numValues=" + numValues + " numDims=" + numDims + " numBytesPerDim=" + numBytesPerDim + " maxPointsInLeafNode=" + maxPointsInLeafNode + " maxMB=" + maxMB);
    }
-    long indexFP;
+
-    try (BKDWriter w = new BKDWriter(dir, "tmp", numDims, numBytesPerDim, maxPointsInLeafNode, maxMB)) {
+    List<Long> toMerge = null;
    List<Integer> docIDBases = null;
    int seg = 0;
    BKDWriter w = new BKDWriter(dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
    IndexInput in = null;
    boolean success = false;
    try {
      byte[] scratch = new byte[numBytesPerDim*numDims];
      int lastDocIDBase = 0;
      boolean useMerge = numDims == 1 && numValues >= 10 && random().nextBoolean();
      int valuesInThisSeg;
      if (useMerge) {
        // Sometimes we will call merge with a single segment:
        valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues);
      } else {
        valuesInThisSeg = 0;
      }
      int segCount = 0;
      for(int ord=0;ord<numValues;ord++) {
        int docID;
        if (docIDs == null) {
@ -632,7 +647,7 @@ public class TestBKD extends LuceneTestCase {
          docID = docIDs[ord];
        }
        if (VERBOSE) {
-          System.out.println("  ord=" + ord + " docID=" + docID);
+          System.out.println("  ord=" + ord + " docID=" + docID + " lastDocIDBase=" + lastDocIDBase);
        }
        for(int dim=0;dim<numDims;dim++) {
          if (VERBOSE) {
@ -640,21 +655,56 @@ public class TestBKD extends LuceneTestCase {
          }
          System.arraycopy(docValues[ord][dim], 0, scratch, dim*numBytesPerDim, numBytesPerDim);
        }
-        w.add(scratch, docID);
+        w.add(scratch, docID-lastDocIDBase);
      }
-      boolean success = false;
+        segCount++;
-      try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+
-        indexFP = w.finish(out);
+        if (useMerge && segCount == valuesInThisSeg) {
-        success = true;
+          if (toMerge == null) {
-      } finally {
+            toMerge = new ArrayList<>();
-        if (success == false) {
+            docIDBases = new ArrayList<>();
-          IOUtils.deleteFilesIgnoringExceptions(dir, "bkd");
+          }
          docIDBases.add(lastDocIDBase);
          toMerge.add(w.finish(out));
          valuesInThisSeg = TestUtil.nextInt(random(), numValues/10, numValues/2);
          segCount = 0;
          seg++;
          maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
          maxMB = (float) 3.0 + (3*random().nextDouble());
          w = new BKDWriter(dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
          lastDocIDBase = docID;
        }
      }
    }
-    try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
+      long indexFP;
      if (toMerge != null) {
        System.out.println("merge " + toMerge.size());
        if (segCount > 0) {
          docIDBases.add(lastDocIDBase);
          toMerge.add(w.finish(out));
        }
        out.close();
        in = dir.openInput("bkd", IOContext.DEFAULT);
        seg++;
        w = new BKDWriter(dir, "_" + seg, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB);
        List<BKDReader> readers = new ArrayList<>();
        for(long fp : toMerge) {
          in.seek(fp);
          readers.add(new BKDReader(in));
        }
        out = dir.createOutput("bkd2", IOContext.DEFAULT);
        indexFP = w.merge(out, null, readers, docIDBases);
        out.close();
        in.close();
        in = dir.openInput("bkd2", IOContext.DEFAULT);
      } else {
        indexFP = w.finish(out);
        out.close();
        in = dir.openInput("bkd", IOContext.DEFAULT);
      }
      in.seek(indexFP);
      BKDReader r = new BKDReader(in);
@ -751,8 +801,17 @@ public class TestBKD extends LuceneTestCase {
          assertEquals("docID=" + docID, expected.get(docID), hits.get(docID));
        }
      }
-    } finally {
+      in.close();
      dir.deleteFile("bkd");
      if (toMerge != null) {
        dir.deleteFile("bkd2");
      }
      success = true;
    } finally {
      if (success == false) {
        IOUtils.closeWhileHandlingException(w, in, out);
        IOUtils.deleteFilesIgnoringExceptions(dir, "bkd", "bkd2");
      }
    }
  }