From 37a83675a79be96dd28f1c96c9ffa9fa7129a8b4 Mon Sep 17 00:00:00 2001 From: Ignacio Vera Date: Wed, 10 Jun 2020 08:13:12 +0200 Subject: [PATCH] LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more (#1558) --- lucene/CHANGES.txt | 2 + .../codecs/lucene60/Lucene60PointsReader.java | 22 +- .../codecs/lucene86/Lucene86PointsReader.java | 22 +- .../org/apache/lucene/util/bkd/BKDReader.java | 193 +++--------------- .../org/apache/lucene/util/bkd/TestBKD.java | 18 +- 5 files changed, 43 insertions(+), 214 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8c1947daee3..baa1c335a90 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -292,6 +292,8 @@ Other * LUCENE-9232: Fix or suppress 13 resource leak precommit warnings in lucene/replicator (Andras Salamon via Erick Erickson) +* LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more. (Ignacio Vera) + Build * Upgrade forbiddenapis to version 3.0.1. (Uwe Schindler) diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java index e24a33c7bbc..482c5f21b2f 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java @@ -19,11 +19,7 @@ package org.apache.lucene.codecs.lucene60; import java.io.Closeable; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; import org.apache.lucene.codecs.CodecUtil; @@ -34,8 +30,6 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.bkd.BKDReader; @@ -133,21 +127,7 @@ public class Lucene60PointsReader extends PointsReader implements Closeable { @Override public long ramBytesUsed() { - long sizeInBytes = 0; - for(BKDReader reader : readers.values()) { - sizeInBytes += reader.ramBytesUsed(); - } - return sizeInBytes; - } - - @Override - public Collection getChildResources() { - List resources = new ArrayList<>(); - for(Map.Entry ent : readers.entrySet()) { - resources.add(Accountables.namedAccountable(readState.fieldInfos.fieldInfo(ent.getKey()).name, - ent.getValue())); - } - return Collections.unmodifiableList(resources); + return 0L; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java index 8ba2e4a67a8..9aabc97372a 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java @@ -19,11 +19,7 @@ package org.apache.lucene.codecs.lucene86; import java.io.Closeable; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; import java.util.HashMap; -import java.util.List; import java.util.Map; import org.apache.lucene.codecs.CodecUtil; @@ -35,8 +31,6 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.bkd.BKDReader; @@ -133,21 +127,7 @@ public class Lucene86PointsReader extends PointsReader implements Closeable { @Override public long ramBytesUsed() { - long sizeInBytes = 0; - for(BKDReader reader : readers.values()) { - sizeInBytes += reader.ramBytesUsed(); - } - return sizeInBytes; - } - - @Override - public Collection getChildResources() { - List resources = new ArrayList<>(); - for(Map.Entry ent : readers.entrySet()) { - resources.add(Accountables.namedAccountable(readState.fieldInfos.fieldInfo(ent.getKey()).name, - ent.getValue())); - } - return Collections.unmodifiableList(resources); + return 0L; } @Override diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java index 1f488381523..1e3702db84c 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java @@ -24,11 +24,7 @@ import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.PointValues; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.store.ByteArrayDataInput; -import org.apache.lucene.store.ByteBufferIndexInput; -import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.Accountable; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.MathUtil; @@ -36,103 +32,7 @@ import org.apache.lucene.util.MathUtil; * * @lucene.experimental */ -public final class BKDReader extends PointValues implements Accountable { - - private static abstract class BKDInput extends DataInput implements Cloneable { - abstract long ramBytesUsed(); - - abstract int getPosition(); - abstract void setPosition(int pos) throws IOException; - - @Override - public BKDInput clone() { - return (BKDInput)super.clone(); - } - } - - private static class BKDOffHeapInput extends BKDInput implements Cloneable { - - private final IndexInput packedIndex; - - BKDOffHeapInput(IndexInput packedIndex) { - this.packedIndex = packedIndex; - } - - @Override - public BKDOffHeapInput clone() { - return new BKDOffHeapInput(packedIndex.clone()); - } - - @Override - long ramBytesUsed() { - return 0; - } - - @Override - int getPosition() { - return (int)packedIndex.getFilePointer(); - } - - @Override - void setPosition(int pos) throws IOException { - packedIndex.seek(pos); - } - - @Override - public byte readByte() throws IOException { - return packedIndex.readByte(); - } - - @Override - public void readBytes(byte[] b, int offset, int len) throws IOException { - packedIndex.readBytes(b, offset, len); - } - } - - private static class BKDOnHeapInput extends BKDInput implements Cloneable { - - private final ByteArrayDataInput packedIndex; - - BKDOnHeapInput(IndexInput packedIndex, int numBytes) throws IOException { - byte[] packedBytes = new byte[numBytes]; - packedIndex.readBytes(packedBytes, 0, numBytes); - this.packedIndex = new ByteArrayDataInput(packedBytes); - } - - private BKDOnHeapInput(ByteArrayDataInput packedIndex) { - this.packedIndex = packedIndex; - } - - @Override - public BKDOnHeapInput clone() { - return new BKDOnHeapInput((ByteArrayDataInput)packedIndex.clone()); - } - - @Override - long ramBytesUsed() { - return packedIndex.length(); - } - - @Override - int getPosition() { - return packedIndex.getPosition(); - } - - @Override - void setPosition(int pos) { - packedIndex.setPosition(pos); - } - - @Override - public byte readByte() throws IOException { - return packedIndex.readByte(); - } - - @Override - public void readBytes(byte[] b, int offset, int len) throws IOException { - packedIndex.readBytes(b, offset, len); - } - } +public final class BKDReader extends PointValues { // Packed array of byte[] holding all split values in the full binary tree: final int leafNodeOffset; @@ -151,18 +51,11 @@ public final class BKDReader extends PointValues implements Accountable { protected final int packedIndexBytesLength; final long minLeafBlockFP; - final BKDInput packedIndex; + final IndexInput packedIndex; - /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */ + /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned. + * BKD tree is always stored off-heap. */ public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throws IOException { - this(metaIn, indexIn, dataIn, indexIn instanceof ByteBufferIndexInput); - } - - /** - * Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned - * and specify {@code true} to store BKD off-heap ({@code false} otherwise) - */ - public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn, boolean offHeap) throws IOException { version = CodecUtil.checkHeader(metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT); numDataDims = metaIn.readVInt(); if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) { @@ -205,13 +98,7 @@ public final class BKDReader extends PointValues implements Accountable { minLeafBlockFP = indexIn.readVLong(); indexIn.seek(indexStartPointer); } - IndexInput slice = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes); - if (offHeap) { - packedIndex = new BKDOffHeapInput(slice); - } else { - packedIndex = new BKDOnHeapInput(slice, numIndexBytes); - } - + this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes); this.in = dataIn; } @@ -219,7 +106,7 @@ public final class BKDReader extends PointValues implements Accountable { return minLeafBlockFP; } - /** Used to walk the in-heap index. The format takes advantage of the limited + /** Used to walk the off-heap index. The format takes advantage of the limited * access pattern to the BKD tree at search time, i.e. starting at the root * node and recursing downwards one child at a time. * @lucene.internal */ @@ -229,13 +116,11 @@ public final class BKDReader extends PointValues implements Accountable { private int level; private int splitDim; private final byte[][] splitPackedValueStack; - // used to read the packed byte[] - private final BKDInput in; + // used to read the packed tree off-heap + private final IndexInput in; // holds the minimum (left most) leaf block file pointer for each level we've recursed to: private final long[] leafBlockFPStack; - // holds the address, in the packed byte[] index, of the left-node of each level: - private final int[] leftNodePositions; - // holds the address, in the packed byte[] index, of the right-node of each level: + // holds the address, in the off-heap index, of the right-node of each level: private final int[] rightNodePositions; // holds the splitDim for each level: private final int[] splitDims; @@ -249,52 +134,41 @@ public final class BKDReader extends PointValues implements Accountable { private final BytesRef scratch; IndexTree() { + this(packedIndex.clone(), 1, 1); + // read root node + readNodeData(false); + } + + private IndexTree(IndexInput in, int nodeID, int level) { int treeDepth = getTreeDepth(); splitPackedValueStack = new byte[treeDepth+1][]; - nodeID = 1; - level = 1; + this.nodeID = nodeID; + this.level = level; splitPackedValueStack[level] = new byte[packedIndexBytesLength]; leafBlockFPStack = new long[treeDepth+1]; - leftNodePositions = new int[treeDepth+1]; rightNodePositions = new int[treeDepth+1]; splitValuesStack = new byte[treeDepth+1][]; splitDims = new int[treeDepth+1]; negativeDeltas = new boolean[numIndexDims*(treeDepth+1)]; - - in = packedIndex.clone(); + this.in = in; splitValuesStack[0] = new byte[packedIndexBytesLength]; - readNodeData(false); scratch = new BytesRef(); scratch.length = bytesPerDim; } public void pushLeft() { - int nodePosition = leftNodePositions[level]; nodeID *= 2; level++; - if (splitPackedValueStack[level] == null) { - splitPackedValueStack[level] = new byte[packedIndexBytesLength]; - } - System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims); - assert splitDim != -1; - negativeDeltas[level*numIndexDims+splitDim] = true; - try { - in.setPosition(nodePosition); - } catch (IOException e) { - throw new UncheckedIOException(e); - } readNodeData(true); } /** Clone, but you are not allowed to pop up past the point where the clone happened. */ @Override public IndexTree clone() { - IndexTree index = new IndexTree(); - index.nodeID = nodeID; - index.level = level; + IndexTree index = new IndexTree(in.clone(), nodeID, level); + // copy node data index.splitDim = splitDim; index.leafBlockFPStack[level] = leafBlockFPStack[level]; - index.leftNodePositions[level] = leftNodePositions[level]; index.rightNodePositions[level] = rightNodePositions[level]; index.splitValuesStack[index.level] = splitValuesStack[index.level].clone(); System.arraycopy(negativeDeltas, level*numIndexDims, index.negativeDeltas, level*numIndexDims, numIndexDims); @@ -303,17 +177,12 @@ public final class BKDReader extends PointValues implements Accountable { } public void pushRight() { - int nodePosition = rightNodePositions[level]; + final int nodePosition = rightNodePositions[level]; + assert nodePosition >= in.getFilePointer() : "nodePosition = " + nodePosition + " < currentPosition=" + in.getFilePointer(); nodeID = nodeID * 2 + 1; level++; - if (splitPackedValueStack[level] == null) { - splitPackedValueStack[level] = new byte[packedIndexBytesLength]; - } - System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims); - assert splitDim != -1; - negativeDeltas[level*numIndexDims+splitDim] = false; try { - in.setPosition(nodePosition); + in.seek(nodePosition); } catch (IOException e) { throw new UncheckedIOException(e); } @@ -401,6 +270,13 @@ public final class BKDReader extends PointValues implements Accountable { } private void readNodeData(boolean isLeft) { + if (splitPackedValueStack[level] == null) { + splitPackedValueStack[level] = new byte[packedIndexBytesLength]; + } + System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims); + assert splitDim != -1; + negativeDeltas[level*numIndexDims+splitDim] = isLeft; + try { leafBlockFPStack[level] = leafBlockFPStack[level - 1]; @@ -443,9 +319,7 @@ public final class BKDReader extends PointValues implements Accountable { } else { leftNumBytes = 0; } - - leftNodePositions[level] = in.getPosition(); - rightNodePositions[level] = leftNodePositions[level] + leftNumBytes; + rightNodePositions[level] = Math.toIntExact(in.getFilePointer()) + leftNumBytes; } } catch (IOException e) { throw new UncheckedIOException(e); @@ -869,11 +743,6 @@ public final class BKDReader extends PointValues implements Accountable { } } - @Override - public long ramBytesUsed() { - return packedIndex.ramBytesUsed(); - } - @Override public byte[] getMinPackedValue() { return minPackedValue.clone(); diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java index 7bd93a326b5..f9bb9ea56b5 100644 --- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java @@ -46,8 +46,6 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.TestUtil; -import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean; - public class TestBKD extends LuceneTestCase { public void testBasicInts1D() throws Exception { @@ -68,7 +66,7 @@ public class TestBKD extends LuceneTestCase { try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) { in.seek(indexFP); - BKDReader r = new BKDReader(in, in, in, false);//randomBoolean()); + BKDReader r = new BKDReader(in, in, in); // Simple 1D range query: final int queryMin = 42; @@ -172,7 +170,7 @@ public class TestBKD extends LuceneTestCase { try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) { in.seek(indexFP); - BKDReader r = new BKDReader(in, in, in, randomBoolean()); + BKDReader r = new BKDReader(in, in, in); byte[] minPackedValue = r.getMinPackedValue(); byte[] maxPackedValue = r.getMaxPackedValue(); @@ -302,7 +300,7 @@ public class TestBKD extends LuceneTestCase { try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) { in.seek(indexFP); - BKDReader r = new BKDReader(in, in, in, randomBoolean()); + BKDReader r = new BKDReader(in, in, in); int iters = atLeast(100); for(int iter=0;iter readers = new ArrayList<>(); for(long fp : toMerge) { in.seek(fp); - readers.add(new BKDReader(in, in, in, randomBoolean())); + readers.add(new BKDReader(in, in, in)); } out = dir.createOutput("bkd2", IOContext.DEFAULT); Runnable finalizer = w.merge(out, out, out, docMaps, readers); @@ -816,7 +814,7 @@ public class TestBKD extends LuceneTestCase { } in.seek(indexFP); - BKDReader r = new BKDReader(in, in, in, randomBoolean()); + BKDReader r = new BKDReader(in, in, in); int iters = atLeast(100); for(int iter=0;iter