Reduce the heap use of BKDReader instances (#13464)

We consume a lot of memory for the `indexIn` slices. If `indexIn` is of type `MemorySegmentIndexInput` the overhead of keeping loads of slices around just for cloning is far higher than the extra 12b per reader this adds (the slice description alone often costs a lot). In a number of Elasticsearch example uses with high segment counts I investigated, this change would save up to O(GB) of heap.
2024-06-07 13:27:10 +02:00 · 2024-06-07 13:27:10 +02:00 · c7a7d48d65
parent 9f8e886702
commit c7a7d48d65
1 changed files with 6 additions and 5 deletions
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@ -43,7 +43,9 @@ public class BKDReader extends PointValues {
  final int version;
  final long minLeafBlockFP;
-  final IndexInput packedIndex;
+  private final long indexStartPointer;
  private final int numIndexBytes;
  private final IndexInput indexIn;
  // if true, the tree is a legacy balanced tree
  private final boolean isTreeBalanced;
@ -95,8 +97,7 @@ public class BKDReader extends PointValues {
    pointCount = metaIn.readVLong();
    docCount = metaIn.readVInt();
-    int numIndexBytes = metaIn.readVInt();
+    numIndexBytes = metaIn.readVInt();
    long indexStartPointer;
    if (version >= BKDWriter.VERSION_META_FILE) {
      minLeafBlockFP = metaIn.readLong();
      indexStartPointer = metaIn.readLong();
@ -105,7 +106,7 @@ public class BKDReader extends PointValues {
      minLeafBlockFP = indexIn.readVLong();
      indexIn.seek(indexStartPointer);
    }
-    this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
+    this.indexIn = indexIn;
    this.in = dataIn;
    // for only one leaf, balanced and unbalanced trees can be handled the same way
    // we set it to unbalanced.
@ -158,7 +159,7 @@ public class BKDReader extends PointValues {
  @Override
  public PointTree getPointTree() throws IOException {
    return new BKDPointTree(
-        packedIndex.clone(),
+        indexIn.slice("packedIndex", indexStartPointer, numIndexBytes),
        this.in.clone(),
        config,
        numLeaves,