diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e277ddc11d7..33adbb7534e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -53,6 +53,9 @@ Improvements Optimizations +* LUCENE-7501: BKDReader should not store the split dimension explicitly in the + 1D case. (Adrien Grand) + Other * LUCENE-7452: Block join query exception suggests how to find a doc, which diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java index 1477f17fa0b..2a01b526914 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java @@ -138,15 +138,26 @@ class SimpleTextPointsReader extends PointsReader { readLine(dataIn); count = parseInt(SPLIT_COUNT); - byte[] splitPackedValues = new byte[count * (1 + bytesPerDim)]; + byte[] splitPackedValues; + int bytesPerIndexEntry; + if (numDims == 1) { + bytesPerIndexEntry = bytesPerDim; + } else { + bytesPerIndexEntry = 1 + bytesPerDim; + } + splitPackedValues = new byte[count * bytesPerIndexEntry]; for(int i=0;i= BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D ? bytesPerDim : bytesPerDim + 1; packedBytesLength = numDims * bytesPerDim; // Read index: @@ -70,7 +73,7 @@ public class BKDReader implements Accountable { pointCount = in.readVLong(); docCount = in.readVInt(); - splitPackedValues = new byte[(1+bytesPerDim)*numLeaves]; + splitPackedValues = new byte[bytesPerIndexEntry*numLeaves]; // TODO: don't write split packed values[0]! in.readBytes(splitPackedValues, 0, splitPackedValues.length); @@ -135,6 +138,7 @@ public class BKDReader implements Accountable { this.numDims = numDims; this.maxPointsInLeafNode = maxPointsInLeafNode; this.bytesPerDim = bytesPerDim; + bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1; packedBytesLength = numDims * bytesPerDim; this.leafNodeOffset = leafBlockFPs.length; this.leafBlockFPs = leafBlockFPs; @@ -234,22 +238,22 @@ public class BKDReader implements Accountable { } else { // Non-leaf node: - int address = nodeID * (bytesPerDim+1); - int splitDim = splitPackedValues[address] & 0xff; + int address = nodeID * bytesPerIndexEntry; + int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff; assert splitDim < numDims; byte[] splitPackedValue = new byte[packedBytesLength]; // Recurse on left sub-tree: System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); verify(state, 2*nodeID, cellMinPacked, splitPackedValue); // Recurse on right sub-tree: System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); verify(state, 2*nodeID+1, splitPackedValue, cellMaxPacked); @@ -457,8 +461,8 @@ public class BKDReader implements Accountable { // Non-leaf node: recurse on the split left and right nodes // TODO: save the unused 1 byte prefix (it's always 0) in the 1d case here: - int address = nodeID * (bytesPerDim+1); - int splitDim = splitPackedValues[address] & 0xff; + int address = nodeID * bytesPerIndexEntry; + int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff; assert splitDim < numDims; // TODO: can we alloc & reuse this up front? @@ -468,14 +472,14 @@ public class BKDReader implements Accountable { // Recurse on left sub-tree: System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); intersect(state, 2*nodeID, cellMinPacked, splitPackedValue); // Recurse on right sub-tree: System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength); - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); intersect(state, 2*nodeID+1, splitPackedValue, cellMaxPacked); @@ -484,16 +488,16 @@ public class BKDReader implements Accountable { /** Copies the split value for this node into the provided byte array */ public void copySplitValue(int nodeID, byte[] splitPackedValue) { - int address = nodeID * (bytesPerDim+1); - int splitDim = splitPackedValues[address] & 0xff; + int address = nodeID * bytesPerIndexEntry; + int splitDim = numDims == 1 ? 0 : splitPackedValues[address++] & 0xff; assert splitDim < numDims; - System.arraycopy(splitPackedValues, address+1, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); + System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim); } @Override public long ramBytesUsed() { - return splitPackedValues.length + - leafBlockFPs.length * Long.BYTES; + return RamUsageEstimator.sizeOf(splitPackedValues) + + RamUsageEstimator.sizeOf(leafBlockFPs); } public byte[] getMinPackedValue() { diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 88a84e9efe9..da732860a25 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -82,7 +82,8 @@ public class BKDWriter implements Closeable { public static final int VERSION_START = 0; public static final int VERSION_COMPRESSED_DOC_IDS = 1; public static final int VERSION_COMPRESSED_VALUES = 2; - public static final int VERSION_CURRENT = VERSION_COMPRESSED_VALUES; + public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3; + public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D; /** How many bytes each docs takes in the fixed-width offline format */ private final int bytesPerDoc; @@ -1033,10 +1034,15 @@ public class BKDWriter implements Closeable { out.writeVLong(pointCount); out.writeVInt(docsSeen.cardinality()); - // TODO: for 1D case, don't waste the first byte of each split value (it's always 0) - // NOTE: splitPackedValues[0] is unused, because nodeID is 1-based: - out.writeBytes(splitPackedValues, 0, splitPackedValues.length); + if (numDims == 1) { + // write the index, skipping the byte used to store the split dim since it is always 0 + for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) { + out.writeBytes(splitPackedValues, i, bytesPerDim); + } + } else { + out.writeBytes(splitPackedValues, 0, splitPackedValues.length); + } long lastFP = 0; for (int i=0;i