LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more (#1558)

This commit is contained in:
Ignacio Vera 2020-06-10 08:13:12 +02:00 committed by GitHub
parent 2660139aa3
commit 37a83675a7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 43 additions and 214 deletions

View File

@ -292,6 +292,8 @@ Other
* LUCENE-9232: Fix or suppress 13 resource leak precommit warnings in lucene/replicator (Andras Salamon via Erick Erickson)
* LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more. (Ignacio Vera)
Build
* Upgrade forbiddenapis to version 3.0.1. (Uwe Schindler)

View File

@ -19,11 +19,7 @@ package org.apache.lucene.codecs.lucene60;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
@ -34,8 +30,6 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
@ -133,21 +127,7 @@ public class Lucene60PointsReader extends PointsReader implements Closeable {
@Override
public long ramBytesUsed() {
long sizeInBytes = 0;
for(BKDReader reader : readers.values()) {
sizeInBytes += reader.ramBytesUsed();
}
return sizeInBytes;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
for(Map.Entry<Integer,BKDReader> ent : readers.entrySet()) {
resources.add(Accountables.namedAccountable(readState.fieldInfos.fieldInfo(ent.getKey()).name,
ent.getValue()));
}
return Collections.unmodifiableList(resources);
return 0L;
}
@Override

View File

@ -19,11 +19,7 @@ package org.apache.lucene.codecs.lucene86;
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
@ -35,8 +31,6 @@ import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
@ -133,21 +127,7 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
@Override
public long ramBytesUsed() {
long sizeInBytes = 0;
for(BKDReader reader : readers.values()) {
sizeInBytes += reader.ramBytesUsed();
}
return sizeInBytes;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
for(Map.Entry<Integer,BKDReader> ent : readers.entrySet()) {
resources.add(Accountables.namedAccountable(readState.fieldInfos.fieldInfo(ent.getKey()).name,
ent.getValue()));
}
return Collections.unmodifiableList(resources);
return 0L;
}
@Override

View File

@ -24,11 +24,7 @@ import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteBufferIndexInput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.MathUtil;
@ -36,103 +32,7 @@ import org.apache.lucene.util.MathUtil;
*
* @lucene.experimental */
public final class BKDReader extends PointValues implements Accountable {
private static abstract class BKDInput extends DataInput implements Cloneable {
abstract long ramBytesUsed();
abstract int getPosition();
abstract void setPosition(int pos) throws IOException;
@Override
public BKDInput clone() {
return (BKDInput)super.clone();
}
}
private static class BKDOffHeapInput extends BKDInput implements Cloneable {
private final IndexInput packedIndex;
BKDOffHeapInput(IndexInput packedIndex) {
this.packedIndex = packedIndex;
}
@Override
public BKDOffHeapInput clone() {
return new BKDOffHeapInput(packedIndex.clone());
}
@Override
long ramBytesUsed() {
return 0;
}
@Override
int getPosition() {
return (int)packedIndex.getFilePointer();
}
@Override
void setPosition(int pos) throws IOException {
packedIndex.seek(pos);
}
@Override
public byte readByte() throws IOException {
return packedIndex.readByte();
}
@Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
packedIndex.readBytes(b, offset, len);
}
}
private static class BKDOnHeapInput extends BKDInput implements Cloneable {
private final ByteArrayDataInput packedIndex;
BKDOnHeapInput(IndexInput packedIndex, int numBytes) throws IOException {
byte[] packedBytes = new byte[numBytes];
packedIndex.readBytes(packedBytes, 0, numBytes);
this.packedIndex = new ByteArrayDataInput(packedBytes);
}
private BKDOnHeapInput(ByteArrayDataInput packedIndex) {
this.packedIndex = packedIndex;
}
@Override
public BKDOnHeapInput clone() {
return new BKDOnHeapInput((ByteArrayDataInput)packedIndex.clone());
}
@Override
long ramBytesUsed() {
return packedIndex.length();
}
@Override
int getPosition() {
return packedIndex.getPosition();
}
@Override
void setPosition(int pos) {
packedIndex.setPosition(pos);
}
@Override
public byte readByte() throws IOException {
return packedIndex.readByte();
}
@Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
packedIndex.readBytes(b, offset, len);
}
}
public final class BKDReader extends PointValues {
// Packed array of byte[] holding all split values in the full binary tree:
final int leafNodeOffset;
@ -151,18 +51,11 @@ public final class BKDReader extends PointValues implements Accountable {
protected final int packedIndexBytesLength;
final long minLeafBlockFP;
final BKDInput packedIndex;
final IndexInput packedIndex;
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
/** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned.
* BKD tree is always stored off-heap. */
public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throws IOException {
this(metaIn, indexIn, dataIn, indexIn instanceof ByteBufferIndexInput);
}
/**
* Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned
* and specify {@code true} to store BKD off-heap ({@code false} otherwise)
*/
public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn, boolean offHeap) throws IOException {
version = CodecUtil.checkHeader(metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
numDataDims = metaIn.readVInt();
if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
@ -205,13 +98,7 @@ public final class BKDReader extends PointValues implements Accountable {
minLeafBlockFP = indexIn.readVLong();
indexIn.seek(indexStartPointer);
}
IndexInput slice = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
if (offHeap) {
packedIndex = new BKDOffHeapInput(slice);
} else {
packedIndex = new BKDOnHeapInput(slice, numIndexBytes);
}
this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
this.in = dataIn;
}
@ -219,7 +106,7 @@ public final class BKDReader extends PointValues implements Accountable {
return minLeafBlockFP;
}
/** Used to walk the in-heap index. The format takes advantage of the limited
/** Used to walk the off-heap index. The format takes advantage of the limited
* access pattern to the BKD tree at search time, i.e. starting at the root
* node and recursing downwards one child at a time.
* @lucene.internal */
@ -229,13 +116,11 @@ public final class BKDReader extends PointValues implements Accountable {
private int level;
private int splitDim;
private final byte[][] splitPackedValueStack;
// used to read the packed byte[]
private final BKDInput in;
// used to read the packed tree off-heap
private final IndexInput in;
// holds the minimum (left most) leaf block file pointer for each level we've recursed to:
private final long[] leafBlockFPStack;
// holds the address, in the packed byte[] index, of the left-node of each level:
private final int[] leftNodePositions;
// holds the address, in the packed byte[] index, of the right-node of each level:
// holds the address, in the off-heap index, of the right-node of each level:
private final int[] rightNodePositions;
// holds the splitDim for each level:
private final int[] splitDims;
@ -249,52 +134,41 @@ public final class BKDReader extends PointValues implements Accountable {
private final BytesRef scratch;
IndexTree() {
this(packedIndex.clone(), 1, 1);
// read root node
readNodeData(false);
}
private IndexTree(IndexInput in, int nodeID, int level) {
int treeDepth = getTreeDepth();
splitPackedValueStack = new byte[treeDepth+1][];
nodeID = 1;
level = 1;
this.nodeID = nodeID;
this.level = level;
splitPackedValueStack[level] = new byte[packedIndexBytesLength];
leafBlockFPStack = new long[treeDepth+1];
leftNodePositions = new int[treeDepth+1];
rightNodePositions = new int[treeDepth+1];
splitValuesStack = new byte[treeDepth+1][];
splitDims = new int[treeDepth+1];
negativeDeltas = new boolean[numIndexDims*(treeDepth+1)];
in = packedIndex.clone();
this.in = in;
splitValuesStack[0] = new byte[packedIndexBytesLength];
readNodeData(false);
scratch = new BytesRef();
scratch.length = bytesPerDim;
}
public void pushLeft() {
int nodePosition = leftNodePositions[level];
nodeID *= 2;
level++;
if (splitPackedValueStack[level] == null) {
splitPackedValueStack[level] = new byte[packedIndexBytesLength];
}
System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims);
assert splitDim != -1;
negativeDeltas[level*numIndexDims+splitDim] = true;
try {
in.setPosition(nodePosition);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
readNodeData(true);
}
/** Clone, but you are not allowed to pop up past the point where the clone happened. */
@Override
public IndexTree clone() {
IndexTree index = new IndexTree();
index.nodeID = nodeID;
index.level = level;
IndexTree index = new IndexTree(in.clone(), nodeID, level);
// copy node data
index.splitDim = splitDim;
index.leafBlockFPStack[level] = leafBlockFPStack[level];
index.leftNodePositions[level] = leftNodePositions[level];
index.rightNodePositions[level] = rightNodePositions[level];
index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
System.arraycopy(negativeDeltas, level*numIndexDims, index.negativeDeltas, level*numIndexDims, numIndexDims);
@ -303,17 +177,12 @@ public final class BKDReader extends PointValues implements Accountable {
}
public void pushRight() {
int nodePosition = rightNodePositions[level];
final int nodePosition = rightNodePositions[level];
assert nodePosition >= in.getFilePointer() : "nodePosition = " + nodePosition + " < currentPosition=" + in.getFilePointer();
nodeID = nodeID * 2 + 1;
level++;
if (splitPackedValueStack[level] == null) {
splitPackedValueStack[level] = new byte[packedIndexBytesLength];
}
System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims);
assert splitDim != -1;
negativeDeltas[level*numIndexDims+splitDim] = false;
try {
in.setPosition(nodePosition);
in.seek(nodePosition);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
@ -401,6 +270,13 @@ public final class BKDReader extends PointValues implements Accountable {
}
private void readNodeData(boolean isLeft) {
if (splitPackedValueStack[level] == null) {
splitPackedValueStack[level] = new byte[packedIndexBytesLength];
}
System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims);
assert splitDim != -1;
negativeDeltas[level*numIndexDims+splitDim] = isLeft;
try {
leafBlockFPStack[level] = leafBlockFPStack[level - 1];
@ -443,9 +319,7 @@ public final class BKDReader extends PointValues implements Accountable {
} else {
leftNumBytes = 0;
}
leftNodePositions[level] = in.getPosition();
rightNodePositions[level] = leftNodePositions[level] + leftNumBytes;
rightNodePositions[level] = Math.toIntExact(in.getFilePointer()) + leftNumBytes;
}
} catch (IOException e) {
throw new UncheckedIOException(e);
@ -869,11 +743,6 @@ public final class BKDReader extends PointValues implements Accountable {
}
}
@Override
public long ramBytesUsed() {
return packedIndex.ramBytesUsed();
}
@Override
public byte[] getMinPackedValue() {
return minPackedValue.clone();

View File

@ -46,8 +46,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
public class TestBKD extends LuceneTestCase {
public void testBasicInts1D() throws Exception {
@ -68,7 +66,7 @@ public class TestBKD extends LuceneTestCase {
try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in, false);//randomBoolean());
BKDReader r = new BKDReader(in, in, in);
// Simple 1D range query:
final int queryMin = 42;
@ -172,7 +170,7 @@ public class TestBKD extends LuceneTestCase {
try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in, randomBoolean());
BKDReader r = new BKDReader(in, in, in);
byte[] minPackedValue = r.getMinPackedValue();
byte[] maxPackedValue = r.getMaxPackedValue();
@ -302,7 +300,7 @@ public class TestBKD extends LuceneTestCase {
try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in, randomBoolean());
BKDReader r = new BKDReader(in, in, in);
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
@ -798,7 +796,7 @@ public class TestBKD extends LuceneTestCase {
List<BKDReader> readers = new ArrayList<>();
for(long fp : toMerge) {
in.seek(fp);
readers.add(new BKDReader(in, in, in, randomBoolean()));
readers.add(new BKDReader(in, in, in));
}
out = dir.createOutput("bkd2", IOContext.DEFAULT);
Runnable finalizer = w.merge(out, out, out, docMaps, readers);
@ -816,7 +814,7 @@ public class TestBKD extends LuceneTestCase {
}
in.seek(indexFP);
BKDReader r = new BKDReader(in, in, in, randomBoolean());
BKDReader r = new BKDReader(in, in, in);
int iters = atLeast(100);
for(int iter=0;iter<iters;iter++) {
@ -1092,7 +1090,7 @@ public class TestBKD extends LuceneTestCase {
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in, in, in, randomBoolean());
BKDReader r = new BKDReader(in, in, in);
r.intersect(new IntersectVisitor() {
int lastDocID = -1;
@ -1210,7 +1208,7 @@ public class TestBKD extends LuceneTestCase {
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in, in, in, randomBoolean());
BKDReader r = new BKDReader(in, in, in);
int[] count = new int[1];
r.intersect(new IntersectVisitor() {
@ -1268,7 +1266,7 @@ public class TestBKD extends LuceneTestCase {
IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
in.seek(fp);
BKDReader r = new BKDReader(in, in, in, randomBoolean());
BKDReader r = new BKDReader(in, in, in);
int[] count = new int[1];
r.intersect(new IntersectVisitor() {