mirror of https://github.com/apache/lucene.git
LUCENE-7501: BKDReader should not store the split dimension explicitly in the 1D case.
This commit is contained in:
parent
df09867ccc
commit
e6d225603a
|
@ -53,6 +53,9 @@ Improvements
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
|
* LUCENE-7501: BKDReader should not store the split dimension explicitly in the
|
||||||
|
1D case. (Adrien Grand)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-7452: Block join query exception suggests how to find a doc, which
|
* LUCENE-7452: Block join query exception suggests how to find a doc, which
|
||||||
|
|
|
@ -138,15 +138,26 @@ class SimpleTextPointsReader extends PointsReader {
|
||||||
readLine(dataIn);
|
readLine(dataIn);
|
||||||
count = parseInt(SPLIT_COUNT);
|
count = parseInt(SPLIT_COUNT);
|
||||||
|
|
||||||
byte[] splitPackedValues = new byte[count * (1 + bytesPerDim)];
|
byte[] splitPackedValues;
|
||||||
|
int bytesPerIndexEntry;
|
||||||
|
if (numDims == 1) {
|
||||||
|
bytesPerIndexEntry = bytesPerDim;
|
||||||
|
} else {
|
||||||
|
bytesPerIndexEntry = 1 + bytesPerDim;
|
||||||
|
}
|
||||||
|
splitPackedValues = new byte[count * bytesPerIndexEntry];
|
||||||
for(int i=0;i<count;i++) {
|
for(int i=0;i<count;i++) {
|
||||||
readLine(dataIn);
|
readLine(dataIn);
|
||||||
splitPackedValues[(1 + bytesPerDim) * i] = (byte) parseInt(SPLIT_DIM);
|
int address = bytesPerIndexEntry * i;
|
||||||
|
int splitDim = parseInt(SPLIT_DIM);
|
||||||
|
if (numDims != 1) {
|
||||||
|
splitPackedValues[address++] = (byte) splitDim;
|
||||||
|
}
|
||||||
readLine(dataIn);
|
readLine(dataIn);
|
||||||
assert startsWith(SPLIT_VALUE);
|
assert startsWith(SPLIT_VALUE);
|
||||||
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(SPLIT_VALUE));
|
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(SPLIT_VALUE));
|
||||||
assert br.length == bytesPerDim;
|
assert br.length == bytesPerDim;
|
||||||
System.arraycopy(br.bytes, br.offset, splitPackedValues, (1 + bytesPerDim) * i + 1, bytesPerDim);
|
System.arraycopy(br.bytes, br.offset, splitPackedValues, address, bytesPerDim);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount, docCount);
|
return new SimpleTextBKDReader(dataIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minValue.bytes, maxValue.bytes, pointCount, docCount);
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.index.PointValues.Relation;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
|
|
||||||
/** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
|
/** Handles intersection of an multi-dimensional shape in byte[] space with a block KD-tree previously written with {@link BKDWriter}.
|
||||||
|
@ -39,6 +40,7 @@ public class BKDReader implements Accountable {
|
||||||
final private int leafNodeOffset;
|
final private int leafNodeOffset;
|
||||||
final int numDims;
|
final int numDims;
|
||||||
final int bytesPerDim;
|
final int bytesPerDim;
|
||||||
|
final int bytesPerIndexEntry;
|
||||||
final IndexInput in;
|
final IndexInput in;
|
||||||
final int maxPointsInLeafNode;
|
final int maxPointsInLeafNode;
|
||||||
final byte[] minPackedValue;
|
final byte[] minPackedValue;
|
||||||
|
@ -54,6 +56,7 @@ public class BKDReader implements Accountable {
|
||||||
numDims = in.readVInt();
|
numDims = in.readVInt();
|
||||||
maxPointsInLeafNode = in.readVInt();
|
maxPointsInLeafNode = in.readVInt();
|
||||||
bytesPerDim = in.readVInt();
|
bytesPerDim = in.readVInt();
|
||||||
|
bytesPerIndexEntry = numDims == 1 && version >= BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D ? bytesPerDim : bytesPerDim + 1;
|
||||||
packedBytesLength = numDims * bytesPerDim;
|
packedBytesLength = numDims * bytesPerDim;
|
||||||
|
|
||||||
// Read index:
|
// Read index:
|
||||||
|
@ -70,7 +73,7 @@ public class BKDReader implements Accountable {
|
||||||
pointCount = in.readVLong();
|
pointCount = in.readVLong();
|
||||||
docCount = in.readVInt();
|
docCount = in.readVInt();
|
||||||
|
|
||||||
splitPackedValues = new byte[(1+bytesPerDim)*numLeaves];
|
splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
|
||||||
|
|
||||||
// TODO: don't write split packed values[0]!
|
// TODO: don't write split packed values[0]!
|
||||||
in.readBytes(splitPackedValues, 0, splitPackedValues.length);
|
in.readBytes(splitPackedValues, 0, splitPackedValues.length);
|
||||||
|
@ -135,6 +138,7 @@ public class BKDReader implements Accountable {
|
||||||
this.numDims = numDims;
|
this.numDims = numDims;
|
||||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||||
this.bytesPerDim = bytesPerDim;
|
this.bytesPerDim = bytesPerDim;
|
||||||
|
bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
|
||||||
packedBytesLength = numDims * bytesPerDim;
|
packedBytesLength = numDims * bytesPerDim;
|
||||||
this.leafNodeOffset = leafBlockFPs.length;
|
this.leafNodeOffset = leafBlockFPs.length;
|
||||||
this.leafBlockFPs = leafBlockFPs;
|
this.leafBlockFPs = leafBlockFPs;
|
||||||
|
@ -234,22 +238,22 @@ public class BKDReader implements Accountable {
|
||||||
} else {
|
} else {
|
||||||
// Non-leaf node:
|
// Non-leaf node:
|
||||||
|
|
||||||
int address = nodeID * (bytesPerDim+1);
|
int address = nodeID * bytesPerIndexEntry;
|
||||||
int splitDim = splitPackedValues[address] & 0xff;
|
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
|
||||||
assert splitDim < numDims;
|
assert splitDim < numDims;
|
||||||
|
|
||||||
byte[] splitPackedValue = new byte[packedBytesLength];
|
byte[] splitPackedValue = new byte[packedBytesLength];
|
||||||
|
|
||||||
// Recurse on left sub-tree:
|
// Recurse on left sub-tree:
|
||||||
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
|
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||||
verify(state,
|
verify(state,
|
||||||
2*nodeID,
|
2*nodeID,
|
||||||
cellMinPacked, splitPackedValue);
|
cellMinPacked, splitPackedValue);
|
||||||
|
|
||||||
// Recurse on right sub-tree:
|
// Recurse on right sub-tree:
|
||||||
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
|
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||||
verify(state,
|
verify(state,
|
||||||
2*nodeID+1,
|
2*nodeID+1,
|
||||||
splitPackedValue, cellMaxPacked);
|
splitPackedValue, cellMaxPacked);
|
||||||
|
@ -457,8 +461,8 @@ public class BKDReader implements Accountable {
|
||||||
// Non-leaf node: recurse on the split left and right nodes
|
// Non-leaf node: recurse on the split left and right nodes
|
||||||
|
|
||||||
// TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here:
|
// TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here:
|
||||||
int address = nodeID * (bytesPerDim+1);
|
int address = nodeID * bytesPerIndexEntry;
|
||||||
int splitDim = splitPackedValues[address] & 0xff;
|
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
|
||||||
assert splitDim < numDims;
|
assert splitDim < numDims;
|
||||||
|
|
||||||
// TODO: can we alloc & reuse this up front?
|
// TODO: can we alloc & reuse this up front?
|
||||||
|
@ -468,14 +472,14 @@ public class BKDReader implements Accountable {
|
||||||
|
|
||||||
// Recurse on left sub-tree:
|
// Recurse on left sub-tree:
|
||||||
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
|
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||||
intersect(state,
|
intersect(state,
|
||||||
2*nodeID,
|
2*nodeID,
|
||||||
cellMinPacked, splitPackedValue);
|
cellMinPacked, splitPackedValue);
|
||||||
|
|
||||||
// Recurse on right sub-tree:
|
// Recurse on right sub-tree:
|
||||||
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
|
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
|
||||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||||
intersect(state,
|
intersect(state,
|
||||||
2*nodeID+1,
|
2*nodeID+1,
|
||||||
splitPackedValue, cellMaxPacked);
|
splitPackedValue, cellMaxPacked);
|
||||||
|
@ -484,16 +488,16 @@ public class BKDReader implements Accountable {
|
||||||
|
|
||||||
/** Copies the split value for this node into the provided byte array */
|
/** Copies the split value for this node into the provided byte array */
|
||||||
public void copySplitValue(int nodeID, byte[] splitPackedValue) {
|
public void copySplitValue(int nodeID, byte[] splitPackedValue) {
|
||||||
int address = nodeID * (bytesPerDim+1);
|
int address = nodeID * bytesPerIndexEntry;
|
||||||
int splitDim = splitPackedValues[address] & 0xff;
|
int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff;
|
||||||
assert splitDim < numDims;
|
assert splitDim < numDims;
|
||||||
System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
return splitPackedValues.length +
|
return RamUsageEstimator.sizeOf(splitPackedValues) +
|
||||||
leafBlockFPs.length * Long.BYTES;
|
RamUsageEstimator.sizeOf(leafBlockFPs);
|
||||||
}
|
}
|
||||||
|
|
||||||
public byte[] getMinPackedValue() {
|
public byte[] getMinPackedValue() {
|
||||||
|
|
|
@ -82,7 +82,8 @@ public class BKDWriter implements Closeable {
|
||||||
public static final int VERSION_START = 0;
|
public static final int VERSION_START = 0;
|
||||||
public static final int VERSION_COMPRESSED_DOC_IDS = 1;
|
public static final int VERSION_COMPRESSED_DOC_IDS = 1;
|
||||||
public static final int VERSION_COMPRESSED_VALUES = 2;
|
public static final int VERSION_COMPRESSED_VALUES = 2;
|
||||||
public static final int VERSION_CURRENT = VERSION_COMPRESSED_VALUES;
|
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
|
||||||
|
public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
|
||||||
|
|
||||||
/** How many bytes each docs takes in the fixed-width offline format */
|
/** How many bytes each docs takes in the fixed-width offline format */
|
||||||
private final int bytesPerDoc;
|
private final int bytesPerDoc;
|
||||||
|
@ -1033,10 +1034,15 @@ public class BKDWriter implements Closeable {
|
||||||
out.writeVLong(pointCount);
|
out.writeVLong(pointCount);
|
||||||
out.writeVInt(docsSeen.cardinality());
|
out.writeVInt(docsSeen.cardinality());
|
||||||
|
|
||||||
// TODO: for 1D case, don't waste the first byte of each split value (it's always 0)
|
|
||||||
|
|
||||||
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
|
// NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
|
||||||
out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
|
if (numDims == 1) {
|
||||||
|
// write the index, skipping the byte used to store the split dim since it is always 0
|
||||||
|
for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) {
|
||||||
|
out.writeBytes(splitPackedValues, i, bytesPerDim);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
|
||||||
|
}
|
||||||
|
|
||||||
long lastFP = 0;
|
long lastFP = 0;
|
||||||
for (int i=0;i<leafBlockFPs.length;i++) {
|
for (int i=0;i<leafBlockFPs.length;i++) {
|
||||||
|
|
Loading…
Reference in New Issue