Reduce the heap use of BKDReader instances (#13464)

We consume a lot of memory for the `indexIn` slices. If `indexIn` is of
type `MemorySegmentIndexInput` the overhead of keeping loads of slices
around just for cloning is far higher than the extra 12b per reader this
adds (the slice description alone often costs a lot).
In a number of Elasticsearch example uses with high segment counts I
investigated, this change would save up to O(GB) of heap.
This commit is contained in:
Armin Braun 2024-06-07 13:27:10 +02:00 committed by GitHub
parent 9f8e886702
commit c7a7d48d65
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 6 additions and 5 deletions

View File

@ -43,7 +43,9 @@ public class BKDReader extends PointValues {
final int version;
final long minLeafBlockFP;
final IndexInput packedIndex;
private final long indexStartPointer;
private final int numIndexBytes;
private final IndexInput indexIn;
// if true, the tree is a legacy balanced tree
private final boolean isTreeBalanced;
@ -95,8 +97,7 @@ public class BKDReader extends PointValues {
pointCount = metaIn.readVLong();
docCount = metaIn.readVInt();
int numIndexBytes = metaIn.readVInt();
long indexStartPointer;
numIndexBytes = metaIn.readVInt();
if (version >= BKDWriter.VERSION_META_FILE) {
minLeafBlockFP = metaIn.readLong();
indexStartPointer = metaIn.readLong();
@ -105,7 +106,7 @@ public class BKDReader extends PointValues {
minLeafBlockFP = indexIn.readVLong();
indexIn.seek(indexStartPointer);
}
this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
this.indexIn = indexIn;
this.in = dataIn;
// for only one leaf, balanced and unbalanced trees can be handled the same way
// we set it to unbalanced.
@ -158,7 +159,7 @@ public class BKDReader extends PointValues {
@Override
public PointTree getPointTree() throws IOException {
return new BKDPointTree(
packedIndex.clone(),
indexIn.slice("packedIndex", indexStartPointer, numIndexBytes),
this.in.clone(),
config,
numLeaves,