LUCENE-9292: Refactor BKD point configuration into its own class (#1697)

This commit is contained in:
Ignacio Vera 2020-09-08 09:03:41 +02:00 committed by GitHub
parent f7cbde2ad8
commit 59b17366ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 1044 additions and 1074 deletions

View File

@ -236,7 +236,9 @@ Documentation
Other
---------------------
(No changes)
* LUCENE-9292: Refactor BKD point configuration into its own class. (Ignacio Vera)
======================= Lucene 8.6.2 =======================

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDConfig;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter;
@ -82,7 +83,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
/** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */
public Lucene60PointsWriter(SegmentWriteState writeState) throws IOException {
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
this(writeState, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
@ -90,13 +91,15 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
PointValues values = reader.getValues(fieldInfo.name);
BKDConfig config = new BKDConfig(fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode);
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
config,
maxMBSortInHeap,
values.size())) {
@ -173,6 +176,11 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
}
}
BKDConfig config = new BKDConfig(fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode);
//System.out.println("MERGE: field=" + fieldInfo.name);
// Optimize the 1D case to use BKDWriter.merge, which does a single merge sort of the
// already sorted incoming segments, instead of trying to sort all points again as if
@ -180,10 +188,7 @@ public class Lucene60PointsWriter extends PointsWriter implements Closeable {
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
config,
maxMBSortInHeap,
totMaxSize)) {
List<BKDReader> bkdReaders = new ArrayList<>();

View File

@ -36,7 +36,7 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;
import org.apache.lucene.util.bkd.BKDConfig;
/**
* Tests Lucene60PointsFormat
@ -47,7 +47,7 @@ public class TestLucene60PointsFormat extends BasePointsFormatTestCase {
public TestLucene60PointsFormat() {
codec = new Lucene84RWCodec();
maxPointsInLeafNode = BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
maxPointsInLeafNode = BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
}
@Override

View File

@ -38,6 +38,7 @@ import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.bkd.BKDConfig;
import org.apache.lucene.util.bkd.BKDRadixSelector;
import org.apache.lucene.util.bkd.BKDWriter;
import org.apache.lucene.util.bkd.HeapPointWriter;
@ -86,35 +87,12 @@ final class SimpleTextBKDWriter implements Closeable {
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
/** How many bytes each docs takes in the fixed-width offline format */
private final int bytesPerDoc;
/** Default maximum number of point in each leaf block */
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
/** Default maximum heap to use, before spilling to (slower) disk */
public static final float DEFAULT_MAX_MB_SORT_IN_HEAP = 16.0f;
/** Maximum number of dimensions (2 * max index dimensions) */
public static final int MAX_DIMS = 16;
/** Maximum number of dimensions */
public static final int MAX_INDEX_DIMS = 8;
/** How many dimensions we are storing at the leaf (data) nodes */
protected final int numDataDims;
/** How many dimensions we are indexing in the internal nodes */
protected final int numIndexDims;
/** How many bytes each value in each dimension takes. */
protected final int bytesPerDim;
/** numDims * bytesPerDim */
protected final int packedBytesLength;
/** numIndexDims * bytesPerDim */
protected final int packedIndexBytesLength;
protected final BKDConfig config;
final BytesRefBuilder scratch = new BytesRefBuilder();
@ -135,7 +113,7 @@ final class SimpleTextBKDWriter implements Closeable {
private boolean finished;
private IndexOutput tempInput;
protected final int maxPointsInLeafNode;
private final int maxPointsSortInHeap;
/** Minimum per-dim values, packed */
@ -152,63 +130,39 @@ final class SimpleTextBKDWriter implements Closeable {
private final int maxDoc;
public SimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDataDims, int numIndexDims, int bytesPerDim,
int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) throws IOException {
verifyParams(numDataDims, numIndexDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
public SimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, BKDConfig config, double maxMBSortInHeap, long totalPointCount) throws IOException {
verifyParams(maxMBSortInHeap, totalPointCount);
this.config = config;
// We use tracking dir to deal with removing files on exception, so each place that
// creates temp files doesn't need crazy try/finally/sucess logic:
this.tempDir = new TrackingDirectoryWrapper(tempDir);
this.tempFileNamePrefix = tempFileNamePrefix;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.numDataDims = numDataDims;
this.numIndexDims = numIndexDims;
this.bytesPerDim = bytesPerDim;
this.totalPointCount = totalPointCount;
this.maxDoc = maxDoc;
docsSeen = new FixedBitSet(maxDoc);
packedBytesLength = numDataDims * bytesPerDim;
packedIndexBytesLength = numIndexDims * bytesPerDim;
scratchDiff = new byte[bytesPerDim];
scratch1 = new byte[packedBytesLength];
scratch2 = new byte[packedBytesLength];
commonPrefixLengths = new int[numDataDims];
minPackedValue = new byte[packedIndexBytesLength];
maxPackedValue = new byte[packedIndexBytesLength];
scratchDiff = new byte[config.bytesPerDim];
scratch1 = new byte[config.packedBytesLength];
scratch2 = new byte[config.packedBytesLength];
commonPrefixLengths = new int[config.numDims];
// dimensional values (numDims * bytesPerDim) + docID (int)
bytesPerDoc = packedBytesLength + Integer.BYTES;
minPackedValue = new byte[config.packedIndexBytesLength];
maxPackedValue = new byte[config.packedIndexBytesLength];
// Maximum number of points we hold in memory at any time
maxPointsSortInHeap = (int) ((maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDataDims));
maxPointsSortInHeap = (int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc * config.numDims));
// Finally, we must be able to hold at least the leaf node in heap during build:
if (maxPointsSortInHeap < maxPointsInLeafNode) {
throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
if (maxPointsSortInHeap < config.maxPointsInLeafNode) {
throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than config.maxPointsInLeafNode=" + config.maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease config.maxPointsInLeafNode");
}
this.maxMBSortInHeap = maxMBSortInHeap;
}
public static void verifyParams(int numDims, int numIndexDims, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) {
// We encode dim in a single byte in the splitPackedValues, but we only expose 4 bits for it now, in case we want to use
// remaining 4 bits for another purpose later
if (numDims < 1 || numDims > MAX_DIMS) {
throw new IllegalArgumentException("numDims must be 1 .. " + MAX_DIMS + " (got: " + numDims + ")");
}
if (numIndexDims < 1 || numIndexDims > MAX_INDEX_DIMS) {
throw new IllegalArgumentException("numIndexDims must be 1 .. " + MAX_INDEX_DIMS + " (got: " + numIndexDims + ")");
}
if (numIndexDims > numDims) {
throw new IllegalArgumentException("numIndexDims cannot exceed numDims (" + numDims + ") (got: " + numIndexDims + ")");
}
if (maxPointsInLeafNode <= 0) {
throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
}
if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
}
public static void verifyParams(double maxMBSortInHeap, long totalPointCount) {
if (maxMBSortInHeap < 0.0) {
throw new IllegalArgumentException("maxMBSortInHeap must be >= 0.0 (got: " + maxMBSortInHeap + ")");
}
@ -218,8 +172,8 @@ final class SimpleTextBKDWriter implements Closeable {
}
public void add(byte[] packedValue, int docID) throws IOException {
if (packedValue.length != packedBytesLength) {
throw new IllegalArgumentException("packedValue should be length=" + packedBytesLength + " (got: " + packedValue.length + ")");
if (packedValue.length != config.packedBytesLength) {
throw new IllegalArgumentException("packedValue should be length=" + config.packedBytesLength + " (got: " + packedValue.length + ")");
}
if (pointCount >= totalPointCount) {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + (pointCount + 1) + " values");
@ -228,21 +182,21 @@ final class SimpleTextBKDWriter implements Closeable {
assert pointWriter == null : "Point writer is already initialized";
//total point count is an estimation but the final point count must be equal or lower to that number.
if (totalPointCount > maxPointsSortInHeap) {
pointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, "spill", 0);
pointWriter = new OfflinePointWriter(config, tempDir, tempFileNamePrefix, "spill", 0);
tempInput = ((OfflinePointWriter)pointWriter).out;
} else {
pointWriter = new HeapPointWriter(Math.toIntExact(totalPointCount), packedBytesLength);
pointWriter = new HeapPointWriter(config, Math.toIntExact(totalPointCount));
}
System.arraycopy(packedValue, 0, minPackedValue, 0, packedIndexBytesLength);
System.arraycopy(packedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
System.arraycopy(packedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(packedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
} else {
for(int dim=0;dim<numIndexDims;dim++) {
int offset = dim*bytesPerDim;
if (Arrays.compareUnsigned(packedValue, offset, offset + bytesPerDim, minPackedValue, offset, offset + bytesPerDim) < 0) {
System.arraycopy(packedValue, offset, minPackedValue, offset, bytesPerDim);
for(int dim=0;dim<config.numIndexDims;dim++) {
int offset = dim*config.bytesPerDim;
if (Arrays.compareUnsigned(packedValue, offset, offset + config.bytesPerDim, minPackedValue, offset, offset + config.bytesPerDim) < 0) {
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim);
}
if (Arrays.compareUnsigned(packedValue, offset, offset + bytesPerDim, maxPackedValue, offset, offset + bytesPerDim) > 0) {
System.arraycopy(packedValue, offset, maxPackedValue, offset, bytesPerDim);
if (Arrays.compareUnsigned(packedValue, offset, offset + config.bytesPerDim, maxPackedValue, offset, offset + config.bytesPerDim) > 0) {
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim);
}
}
}
@ -262,7 +216,7 @@ final class SimpleTextBKDWriter implements Closeable {
* disk. This method does not use transient disk in order to reorder points.
*/
public long writeField(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
if (numIndexDims == 1) {
if (config.numIndexDims == 1) {
return writeField1Dim(out, fieldName, reader);
} else {
return writeFieldNDims(out, fieldName, reader);
@ -288,7 +242,7 @@ final class SimpleTextBKDWriter implements Closeable {
long countPerLeaf = pointCount = values.size();
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
while (countPerLeaf > config.maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
@ -297,7 +251,7 @@ final class SimpleTextBKDWriter implements Closeable {
checkMaxLeafNodeCount(numLeaves);
final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
final byte[] splitPackedValues = new byte[numLeaves * (config.bytesPerDim + 1)];
final long[] leafBlockFPs = new long[numLeaves];
// compute the min/max for this slice
@ -305,13 +259,13 @@ final class SimpleTextBKDWriter implements Closeable {
Arrays.fill(maxPackedValue, (byte) 0);
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
values.getValue(i, scratchBytesRef1);
for(int dim=0;dim<numIndexDims;dim++) {
int offset = dim*bytesPerDim;
if (Arrays.compareUnsigned(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, scratchBytesRef1.offset + offset + bytesPerDim, minPackedValue, offset, offset + bytesPerDim) < 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
for(int dim=0;dim<config.numIndexDims;dim++) {
int offset = dim*config.bytesPerDim;
if (Arrays.compareUnsigned(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, scratchBytesRef1.offset + offset + config.bytesPerDim, minPackedValue, offset, offset + config.bytesPerDim) < 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, config.bytesPerDim);
}
if (Arrays.compareUnsigned(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, scratchBytesRef1.offset + offset + bytesPerDim, maxPackedValue, offset, offset + bytesPerDim) > 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
if (Arrays.compareUnsigned(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, scratchBytesRef1.offset + offset + config.bytesPerDim, maxPackedValue, offset, offset + config.bytesPerDim) > 0) {
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, config.bytesPerDim);
}
}
@ -320,7 +274,7 @@ final class SimpleTextBKDWriter implements Closeable {
build(1, numLeaves, values, 0, Math.toIntExact(pointCount), out,
minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
new int[maxPointsInLeafNode]);
new int[config.maxPointsInLeafNode]);
long indexFP = out.getFilePointer();
writeIndex(out, leafBlockFPs, splitPackedValues);
@ -331,7 +285,7 @@ final class SimpleTextBKDWriter implements Closeable {
/* In the 1D case, we can simply sort points in ascending order and use the
* same writing logic as we use at merge time. */
private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
MutablePointsReaderUtils.sort(maxDoc, packedIndexBytesLength, reader, 0, Math.toIntExact(reader.size()));
MutablePointsReaderUtils.sort(config, maxDoc, reader, 0, Math.toIntExact(reader.size()));
final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
@ -361,14 +315,14 @@ final class SimpleTextBKDWriter implements Closeable {
final IndexOutput out;
final List<Long> leafBlockFPs = new ArrayList<>();
final List<byte[]> leafBlockStartValues = new ArrayList<>();
final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
final int[] leafDocs = new int[maxPointsInLeafNode];
final byte[] leafValues = new byte[config.maxPointsInLeafNode * config.packedBytesLength];
final int[] leafDocs = new int[config.maxPointsInLeafNode];
long valueCount;
int leafCount;
OneDimensionBKDWriter(IndexOutput out) {
if (numIndexDims != 1) {
throw new UnsupportedOperationException("numIndexDims must be 1 but got " + numIndexDims);
if (config.numIndexDims != 1) {
throw new UnsupportedOperationException("config.numIndexDims must be 1 but got " + config.numIndexDims);
}
if (pointCount != 0) {
throw new IllegalStateException("cannot mix add and merge");
@ -384,7 +338,7 @@ final class SimpleTextBKDWriter implements Closeable {
this.out = out;
lastPackedValue = new byte[packedBytesLength];
lastPackedValue = new byte[config.packedBytesLength];
}
// for asserts
@ -395,7 +349,7 @@ final class SimpleTextBKDWriter implements Closeable {
assert valueInOrder(valueCount + leafCount,
0, lastPackedValue, packedValue, 0, docID, lastDocID);
System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
System.arraycopy(packedValue, 0, leafValues, leafCount * config.packedBytesLength, config.packedBytesLength);
leafDocs[leafCount] = docID;
docsSeen.set(docID);
leafCount++;
@ -404,7 +358,7 @@ final class SimpleTextBKDWriter implements Closeable {
throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
}
if (leafCount == maxPointsInLeafNode) {
if (leafCount == config.maxPointsInLeafNode) {
// We write a block once we hit exactly the max count ... this is different from
// when we flush a new segment, where we write between max/2 and max per leaf block,
// so merged segments will behave differently from newly flushed segments:
@ -433,7 +387,7 @@ final class SimpleTextBKDWriter implements Closeable {
//System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts=" + leafBlockStartValues.size());
byte[] index = new byte[(1+numInnerNodes) * (1+bytesPerDim)];
byte[] index = new byte[(1+numInnerNodes) * (1+config.bytesPerDim)];
rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
long[] arr = new long[leafBlockFPs.size()];
for(int i=0;i<leafBlockFPs.size();i++) {
@ -446,24 +400,24 @@ final class SimpleTextBKDWriter implements Closeable {
private void writeLeafBlock() throws IOException {
assert leafCount != 0;
if (valueCount == 0) {
System.arraycopy(leafValues, 0, minPackedValue, 0, packedIndexBytesLength);
System.arraycopy(leafValues, 0, minPackedValue, 0, config.packedIndexBytesLength);
}
System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedIndexBytesLength);
System.arraycopy(leafValues, (leafCount - 1) * config.packedBytesLength, maxPackedValue, 0, config.packedIndexBytesLength);
valueCount += leafCount;
if (leafBlockFPs.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
leafBlockStartValues.add(ArrayUtil.copyOfSubArray(leafValues, 0, packedBytesLength));
leafBlockStartValues.add(ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength));
}
leafBlockFPs.add(out.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
Arrays.fill(commonPrefixLengths, bytesPerDim);
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
// Find per-dim common prefix:
for(int dim=0;dim<numDataDims;dim++) {
int offset1 = dim * bytesPerDim;
int offset2 = (leafCount - 1) * packedBytesLength + offset1;
for(int dim=0;dim<config.numDims;dim++) {
int offset1 = dim * config.bytesPerDim;
int offset2 = (leafCount - 1) * config.packedBytesLength + offset1;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (leafValues[offset1+j] != leafValues[offset2+j]) {
commonPrefixLengths[dim] = j;
@ -478,18 +432,18 @@ final class SimpleTextBKDWriter implements Closeable {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.length = config.packedBytesLength;
scratch.bytes = leafValues;
}
@Override
public BytesRef apply(int i) {
scratch.offset = packedBytesLength * i;
scratch.offset = config.packedBytesLength * i;
return scratch;
}
};
assert valuesInOrderAndBounds(leafCount, 0, ArrayUtil.copyOfSubArray(leafValues, 0, packedBytesLength),
ArrayUtil.copyOfSubArray(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
assert valuesInOrderAndBounds(leafCount, 0, ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength),
ArrayUtil.copyOfSubArray(leafValues, (leafCount - 1) * config.packedBytesLength, leafCount * config.packedBytesLength),
packedValues, leafDocs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
}
@ -498,12 +452,12 @@ final class SimpleTextBKDWriter implements Closeable {
// TODO: there must be a simpler way?
private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
//System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
//System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + config.bytesPerDim + " index.length=" + index.length);
if (count == 1) {
// Leaf index node
//System.out.println(" leaf index node");
//System.out.println(" index[" + nodeID + "] = blockStartValues[" + offset + "]");
System.arraycopy(leafBlockStartValues.get(offset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
System.arraycopy(leafBlockStartValues.get(offset), 0, index, nodeID*(1+config.bytesPerDim)+1, config.bytesPerDim);
} else if (count > 1) {
// Internal index node: binary partition of count
int countAtLevel = 1;
@ -524,7 +478,7 @@ final class SimpleTextBKDWriter implements Closeable {
System.out.println(" rootOffset=" + rootOffset);
*/
System.arraycopy(leafBlockStartValues.get(rootOffset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
System.arraycopy(leafBlockStartValues.get(rootOffset), 0, index, nodeID*(1+config.bytesPerDim)+1, config.bytesPerDim);
//System.out.println(" index[" + nodeID + "] = blockStartValues[" + rootOffset + "]");
// TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree
@ -546,8 +500,8 @@ final class SimpleTextBKDWriter implements Closeable {
}
private void checkMaxLeafNodeCount(int numLeaves) {
if ((1+bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
if ((1+config.bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalStateException("too many nodes; increase config.maxPointsInLeafNode (currently " + config.maxPointsInLeafNode + ") and reindex");
}
}
@ -580,7 +534,7 @@ final class SimpleTextBKDWriter implements Closeable {
long countPerLeaf = pointCount;
long innerNodeCount = 1;
while (countPerLeaf > maxPointsInLeafNode) {
while (countPerLeaf > config.maxPointsInLeafNode) {
countPerLeaf = (countPerLeaf+1)/2;
innerNodeCount *= 2;
}
@ -593,16 +547,16 @@ final class SimpleTextBKDWriter implements Closeable {
// step of the recursion to recompute the split dim:
// Indexed by nodeID, but first (root) nodeID is 1. We do 1+ because the lead byte at each recursion says which dim we split on.
byte[] splitPackedValues = new byte[Math.toIntExact(numLeaves*(1+bytesPerDim))];
byte[] splitPackedValues = new byte[Math.toIntExact(numLeaves*(1+config.bytesPerDim))];
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
long[] leafBlockFPs = new long[numLeaves];
// Make sure the math above "worked":
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
assert pointCount / numLeaves <= config.maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " config.maxPointsInLeafNode=" + config.maxPointsInLeafNode;
//We re-use the selector so we do not need to create an object every time.
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
BKDRadixSelector radixSelector = new BKDRadixSelector(config, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
boolean success = false;
try {
@ -610,7 +564,7 @@ final class SimpleTextBKDWriter implements Closeable {
build(1, numLeaves, points, out,
radixSelector, minPackedValue, maxPackedValue,
splitPackedValues, leafBlockFPs, new int[maxPointsInLeafNode]);
splitPackedValues, leafBlockFPs, new int[config.maxPointsInLeafNode]);
// If no exception, we should have cleaned everything up:
@ -636,19 +590,19 @@ final class SimpleTextBKDWriter implements Closeable {
/** Subclass can change how it writes the index. */
private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
write(out, NUM_DATA_DIMS);
writeInt(out, numDataDims);
writeInt(out, config.numDims);
newline(out);
write(out, NUM_INDEX_DIMS);
writeInt(out, numIndexDims);
writeInt(out, config.numIndexDims);
newline(out);
write(out, BYTES_PER_DIM);
writeInt(out, bytesPerDim);
writeInt(out, config.bytesPerDim);
newline(out);
write(out, MAX_LEAF_POINTS);
writeInt(out, maxPointsInLeafNode);
writeInt(out, config.maxPointsInLeafNode);
newline(out);
write(out, INDEX_COUNT);
@ -679,8 +633,8 @@ final class SimpleTextBKDWriter implements Closeable {
newline(out);
}
assert (splitPackedValues.length % (1 + bytesPerDim)) == 0;
int count = splitPackedValues.length / (1 + bytesPerDim);
assert (splitPackedValues.length % (1 + config.bytesPerDim)) == 0;
int count = splitPackedValues.length / (1 + config.bytesPerDim);
assert count == leafBlockFPs.length;
write(out, SPLIT_COUNT);
@ -689,10 +643,10 @@ final class SimpleTextBKDWriter implements Closeable {
for(int i=0;i<count;i++) {
write(out, SPLIT_DIM);
writeInt(out, splitPackedValues[i * (1 + bytesPerDim)] & 0xff);
writeInt(out, splitPackedValues[i * (1 + config.bytesPerDim)] & 0xff);
newline(out);
write(out, SPLIT_VALUE);
br = new BytesRef(splitPackedValues, 1+(i * (1+bytesPerDim)), bytesPerDim);
br = new BytesRef(splitPackedValues, 1+(i * (1+config.bytesPerDim)), config.bytesPerDim);
write(out, br.toString());
newline(out);
}
@ -722,11 +676,11 @@ final class SimpleTextBKDWriter implements Closeable {
private void writeLeafBlockPackedValuesRange(IndexOutput out, int[] commonPrefixLengths, int start, int end, IntFunction<BytesRef> packedValues) throws IOException {
for (int i = start; i < end; ++i) {
BytesRef ref = packedValues.apply(i);
assert ref.length == packedBytesLength;
assert ref.length == config.packedBytesLength;
for(int dim=0;dim<numDataDims;dim++) {
for(int dim=0;dim<config.numDims;dim++) {
int prefix = commonPrefixLengths[dim];
out.writeBytes(ref.bytes, ref.offset + dim*bytesPerDim + prefix, bytesPerDim-prefix);
out.writeBytes(ref.bytes, ref.offset + dim*config.bytesPerDim + prefix, config.bytesPerDim-prefix);
}
}
}
@ -780,12 +734,12 @@ final class SimpleTextBKDWriter implements Closeable {
/** Called only in assert */
private boolean valueInBounds(BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue) {
for(int dim=0;dim<numIndexDims;dim++) {
int offset = bytesPerDim*dim;
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDim, minPackedValue, offset, offset + bytesPerDim) < 0) {
for(int dim=0;dim<config.numIndexDims;dim++) {
int offset = config.bytesPerDim*dim;
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + config.bytesPerDim, minPackedValue, offset, offset + config.bytesPerDim) < 0) {
return false;
}
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDim, maxPackedValue, offset, offset + bytesPerDim) > 0) {
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + config.bytesPerDim, maxPackedValue, offset, offset + config.bytesPerDim) > 0) {
return false;
}
}
@ -796,10 +750,10 @@ final class SimpleTextBKDWriter implements Closeable {
protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
// Find which dim has the largest span so we can split on it:
int splitDim = -1;
for(int dim=0;dim<numIndexDims;dim++) {
NumericUtils.subtract(bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
if (splitDim == -1 || Arrays.compareUnsigned(scratchDiff, 0, bytesPerDim, scratch1, 0, bytesPerDim) > 0) {
System.arraycopy(scratchDiff, 0, scratch1, 0, bytesPerDim);
for(int dim=0;dim<config.numIndexDims;dim++) {
NumericUtils.subtract(config.bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
if (splitDim == -1 || Arrays.compareUnsigned(scratchDiff, 0, config.bytesPerDim, scratch1, 0, config.bytesPerDim) > 0) {
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim);
splitDim = dim;
}
}
@ -812,7 +766,7 @@ final class SimpleTextBKDWriter implements Closeable {
private HeapPointWriter switchToHeap(PointWriter source) throws IOException {
int count = Math.toIntExact(source.count());
try (PointReader reader = source.getReader(0, count);
HeapPointWriter writer = new HeapPointWriter(count, packedBytesLength)) {
HeapPointWriter writer = new HeapPointWriter(config, count)) {
for(int i=0;i<count;i++) {
boolean hasNext = reader.next();
assert hasNext;
@ -836,15 +790,15 @@ final class SimpleTextBKDWriter implements Closeable {
if (nodeID >= leafNodeOffset) {
// leaf node
final int count = to - from;
assert count <= maxPointsInLeafNode;
assert count <= config.maxPointsInLeafNode;
// Compute common prefixes
Arrays.fill(commonPrefixLengths, bytesPerDim);
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
reader.getValue(from, scratchBytesRef1);
for (int i = from + 1; i < to; ++i) {
reader.getValue(i, scratchBytesRef2);
for (int dim=0;dim<numDataDims;dim++) {
final int offset = dim * bytesPerDim;
for (int dim=0;dim<config.numDims;dim++) {
final int offset = dim * config.bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) {
if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
commonPrefixLengths[dim] = j;
@ -855,23 +809,23 @@ final class SimpleTextBKDWriter implements Closeable {
}
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
FixedBitSet[] usedBytes = new FixedBitSet[numDataDims];
for (int dim = 0; dim < numDataDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
for (int dim = 0; dim < config.numDims; ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
for (int i = from + 1; i < to; ++i) {
for (int dim=0;dim<numDataDims;dim++) {
for (int dim=0;dim<config.numDims;dim++) {
if (usedBytes[dim] != null) {
byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
byte b = reader.getByteAt(i, dim * config.bytesPerDim + commonPrefixLengths[dim]);
usedBytes[dim].set(Byte.toUnsignedInt(b));
}
}
}
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
for (int dim = 0; dim < numDataDims; ++dim) {
for (int dim = 0; dim < config.numDims; ++dim) {
if (usedBytes[dim] != null) {
final int cardinality = usedBytes[dim].cardinality();
if (cardinality < sortedDimCardinality) {
@ -882,7 +836,7 @@ final class SimpleTextBKDWriter implements Closeable {
}
// sort by sortedDim
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
MutablePointsReaderUtils.sortByDim(config, sortedDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
@ -897,7 +851,7 @@ final class SimpleTextBKDWriter implements Closeable {
// Write the common prefixes:
reader.getValue(from, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength);
// Write the full values:
IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@ -918,28 +872,28 @@ final class SimpleTextBKDWriter implements Closeable {
final int splitDim = split(minPackedValue, maxPackedValue);
final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim;
for (int i = 0; i < bytesPerDim; ++i) {
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
int commonPrefixLen = config.bytesPerDim;
for (int i = 0; i < config.bytesPerDim; ++i) {
if (minPackedValue[splitDim * config.bytesPerDim + i] != maxPackedValue[splitDim * config.bytesPerDim + i]) {
commonPrefixLen = i;
break;
}
}
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
MutablePointsReaderUtils.partition(config, maxDoc, splitDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
final int address = nodeID * (1+bytesPerDim);
final int address = nodeID * (1+config.bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
reader.getValue(mid, scratchBytesRef1);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * config.bytesPerDim, splitPackedValues, address + 1, config.bytesPerDim);
byte[] minSplitPackedValue = ArrayUtil.copyOfSubArray(minPackedValue, 0, packedIndexBytesLength);
byte[] maxSplitPackedValue = ArrayUtil.copyOfSubArray(maxPackedValue, 0, packedIndexBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
byte[] minSplitPackedValue = ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength);
byte[] maxSplitPackedValue = ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * config.bytesPerDim,
minSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * config.bytesPerDim,
maxSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
// recurse
build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
@ -980,17 +934,17 @@ final class SimpleTextBKDWriter implements Closeable {
int sortedDim = 0;
int sortedDimCardinality = Integer.MAX_VALUE;
FixedBitSet[] usedBytes = new FixedBitSet[numDataDims];
for (int dim = 0; dim < numDataDims; ++dim) {
if (commonPrefixLengths[dim] < bytesPerDim) {
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
for (int dim = 0; dim < config.numDims; ++dim) {
if (commonPrefixLengths[dim] < config.bytesPerDim) {
usedBytes[dim] = new FixedBitSet(256);
}
}
//Find the dimension to compress
for (int dim = 0; dim < numDataDims; dim++) {
for (int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
if (prefix < bytesPerDim) {
int offset = dim * bytesPerDim;
if (prefix < config.bytesPerDim) {
int offset = dim * config.bytesPerDim;
for (int i = 0; i < heapSource.count(); ++i) {
PointValue value = heapSource.getPackedValueSlice(i);
BytesRef packedValue = value.packedValue();
@ -1031,7 +985,7 @@ final class SimpleTextBKDWriter implements Closeable {
final BytesRef scratch = new BytesRef();
{
scratch.length = packedBytesLength;
scratch.length = config.packedBytesLength;
}
@Override
@ -1048,7 +1002,7 @@ final class SimpleTextBKDWriter implements Closeable {
// Inner node: partition/recurse
int splitDim;
if (numIndexDims > 1) {
if (config.numIndexDims > 1) {
splitDim = split(minPackedValue, maxPackedValue);
} else {
splitDim = 0;
@ -1060,29 +1014,29 @@ final class SimpleTextBKDWriter implements Closeable {
long rightCount = points.count / 2;
long leftCount = points.count - rightCount;
int commonPrefixLen = Arrays.mismatch(minPackedValue, splitDim * bytesPerDim,
splitDim * bytesPerDim + bytesPerDim, maxPackedValue, splitDim * bytesPerDim,
splitDim * bytesPerDim + bytesPerDim);
int commonPrefixLen = Arrays.mismatch(minPackedValue, splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim, maxPackedValue, splitDim * config.bytesPerDim,
splitDim * config.bytesPerDim + config.bytesPerDim);
if (commonPrefixLen == -1) {
commonPrefixLen = bytesPerDim;
commonPrefixLen = config.bytesPerDim;
}
BKDRadixSelector.PathSlice[] pathSlices = new BKDRadixSelector.PathSlice[2];
byte[] splitValue = radixSelector.select(points, pathSlices, points.start, points.start + points.count, points.start + leftCount, splitDim, commonPrefixLen);
int address = nodeID * (1 + bytesPerDim);
int address = nodeID * (1 + config.bytesPerDim);
splitPackedValues[address] = (byte) splitDim;
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, bytesPerDim);
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, config.bytesPerDim);
byte[] minSplitPackedValue = new byte[packedIndexBytesLength];
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedIndexBytesLength);
byte[] minSplitPackedValue = new byte[config.packedIndexBytesLength];
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, config.packedIndexBytesLength);
byte[] maxSplitPackedValue = new byte[packedIndexBytesLength];
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, packedIndexBytesLength);
byte[] maxSplitPackedValue = new byte[config.packedIndexBytesLength];
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitValue, 0, minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(splitValue, 0, maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
System.arraycopy(splitValue, 0, minSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
System.arraycopy(splitValue, 0, maxSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
// Recurse on left tree:
build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector,
@ -1096,18 +1050,18 @@ final class SimpleTextBKDWriter implements Closeable {
}
private void computeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix) {
Arrays.fill(commonPrefixLengths, bytesPerDim);
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
PointValue value = heapPointWriter.getPackedValueSlice(0);
BytesRef packedValue = value.packedValue();
for (int dim = 0; dim < numDataDims; dim++) {
System.arraycopy(packedValue.bytes, packedValue.offset + dim * bytesPerDim, commonPrefix, dim * bytesPerDim, bytesPerDim);
for (int dim = 0; dim < config.numDims; dim++) {
System.arraycopy(packedValue.bytes, packedValue.offset + dim * config.bytesPerDim, commonPrefix, dim * config.bytesPerDim, config.bytesPerDim);
}
for (int i = 1; i < heapPointWriter.count(); i++) {
value = heapPointWriter.getPackedValueSlice(i);
packedValue = value.packedValue();
for (int dim = 0; dim < numDataDims; dim++) {
for (int dim = 0; dim < config.numDims; dim++) {
if (commonPrefixLengths[dim] != 0) {
int j = Arrays.mismatch(commonPrefix, dim * bytesPerDim, dim * bytesPerDim + commonPrefixLengths[dim], packedValue.bytes, packedValue.offset + dim * bytesPerDim, packedValue.offset + dim * bytesPerDim + commonPrefixLengths[dim]);
int j = Arrays.mismatch(commonPrefix, dim * config.bytesPerDim, dim * config.bytesPerDim + commonPrefixLengths[dim], packedValue.bytes, packedValue.offset + dim * config.bytesPerDim, packedValue.offset + dim * config.bytesPerDim + commonPrefixLengths[dim]);
if (j != -1) {
commonPrefixLengths[dim] = j;
}
@ -1119,11 +1073,11 @@ final class SimpleTextBKDWriter implements Closeable {
// only called from assert
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
byte[] lastPackedValue = new byte[packedBytesLength];
byte[] lastPackedValue = new byte[config.packedBytesLength];
int lastDoc = -1;
for (int i=0;i<count;i++) {
BytesRef packedValue = values.apply(i);
assert packedValue.length == packedBytesLength;
assert packedValue.length == config.packedBytesLength;
assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
docs[docsOffset + i], lastDoc);
lastDoc = docs[docsOffset + i];
@ -1137,24 +1091,24 @@ final class SimpleTextBKDWriter implements Closeable {
// only called from assert
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
int doc, int lastDoc) {
int dimOffset = sortedDim * bytesPerDim;
int dimOffset = sortedDim * config.bytesPerDim;
if (ord > 0) {
int cmp = Arrays.compareUnsigned(lastPackedValue, dimOffset, dimOffset + bytesPerDim, packedValue, packedValueOffset + dimOffset, packedValueOffset + dimOffset + bytesPerDim);
int cmp = Arrays.compareUnsigned(lastPackedValue, dimOffset, dimOffset + config.bytesPerDim, packedValue, packedValueOffset + dimOffset, packedValueOffset + dimOffset + config.bytesPerDim);
if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, config.packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
}
if (cmp == 0 && numDataDims > numIndexDims) {
int dataOffset = numIndexDims * bytesPerDim;
cmp = Arrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
if (cmp == 0 && config.numDims > config.numIndexDims) {
int dataOffset = config.numIndexDims * config.bytesPerDim;
cmp = Arrays.compareUnsigned(lastPackedValue, dataOffset, config.packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + config.packedBytesLength);
if (cmp > 0) {
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, config.packedBytesLength) + " ord=" + ord);
}
}
if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim);
}
}
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, config.packedBytesLength);
return true;
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.bkd.BKDConfig;
class SimpleTextPointsWriter extends PointsWriter {
@ -72,14 +73,17 @@ class SimpleTextPointsWriter extends PointsWriter {
PointValues values = reader.getValues(fieldInfo.name);
BKDConfig config = new BKDConfig(fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
// We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk:
try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
config,
SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
values.size())) {

View File

@ -36,6 +36,7 @@ import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDConfig;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.bkd.BKDWriter;
@ -98,7 +99,7 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable {
/** Uses the defaults values for {@code maxPointsInLeafNode} (1024) and {@code maxMBSortInHeap} (16.0) */
public Lucene86PointsWriter(SegmentWriteState writeState) throws IOException {
this(writeState, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
this(writeState, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
}
@Override
@ -106,13 +107,15 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable {
PointValues values = reader.getValues(fieldInfo.name);
BKDConfig config = new BKDConfig(fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode);
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
config,
maxMBSortInHeap,
values.size())) {
@ -189,6 +192,11 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable {
}
}
BKDConfig config = new BKDConfig(fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode);
//System.out.println("MERGE: field=" + fieldInfo.name);
// Optimize the 1D case to use BKDWriter.merge, which does a single merge sort of the
// already sorted incoming segments, instead of trying to sort all points again as if
@ -196,10 +204,7 @@ public class Lucene86PointsWriter extends PointsWriter implements Closeable {
try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
config,
maxMBSortInHeap,
totMaxSize)) {
List<BKDReader> bkdReaders = new ArrayList<>();

View File

@ -29,7 +29,7 @@ import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LatLonPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.bkd.BKDWriter;
import org.apache.lucene.util.bkd.BKDConfig;
/**
* Access to indexed numeric values.
@ -87,10 +87,10 @@ public abstract class PointValues {
public static final int MAX_NUM_BYTES = 16;
/** Maximum number of dimensions */
public static final int MAX_DIMENSIONS = BKDWriter.MAX_DIMS;
public static final int MAX_DIMENSIONS = BKDConfig.MAX_DIMS;
/** Maximum number of index dimensions */
public static final int MAX_INDEX_DIMENSIONS = BKDWriter.MAX_INDEX_DIMS;
public static final int MAX_INDEX_DIMENSIONS = BKDConfig.MAX_INDEX_DIMS;
/** Return the cumulated number of points across all leaves of the given
* {@link IndexReader}. Leaves that do not have points for the given field

View File

@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util.bkd;
import org.apache.lucene.util.ArrayUtil;
/**
* Basic parameters for indexing points on the BKD tree.
*/
public final class BKDConfig {
/** Default maximum number of point in each leaf block */
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 512;
/** Maximum number of index dimensions (2 * max index dimensions) */
public static final int MAX_DIMS = 16;
/** Maximum number of index dimensions */
public static final int MAX_INDEX_DIMS = 8;
/** How many dimensions we are storing at the leaf (data) nodes */
public final int numDims;
/** How many dimensions we are indexing in the internal nodes */
public final int numIndexDims;
/** How many bytes each value in each dimension takes. */
public final int bytesPerDim;
/** max points allowed on a Leaf block */
public final int maxPointsInLeafNode;
/** numDataDims * bytesPerDim */
public final int packedBytesLength;
/** numIndexDims * bytesPerDim */
public final int packedIndexBytesLength;
/** packedBytesLength plus docID size */
public final int bytesPerDoc;
public BKDConfig(final int numDims, final int numIndexDims, final int bytesPerDim, final int maxPointsInLeafNode) {
verifyParams(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
this.numDims = numDims;
this.numIndexDims = numIndexDims;
this.bytesPerDim = bytesPerDim;
this.maxPointsInLeafNode = maxPointsInLeafNode;
this.packedIndexBytesLength = numIndexDims * bytesPerDim;
this.packedBytesLength = numDims * bytesPerDim;
// dimensional values (numDims * bytesPerDim) + docID (int)
this.bytesPerDoc = this.packedBytesLength + Integer.BYTES;
}
private static void verifyParams(final int numDims, final int numIndexDims, final int bytesPerDim, final int maxPointsInLeafNode) {
// Check inputs are on bounds
if (numDims < 1 || numDims > MAX_DIMS) {
throw new IllegalArgumentException("numDims must be 1 .. " + MAX_DIMS + " (got: " + numDims + ")");
}
if (numIndexDims < 1 || numIndexDims > MAX_INDEX_DIMS) {
throw new IllegalArgumentException("numIndexDims must be 1 .. " + MAX_INDEX_DIMS + " (got: " + numIndexDims + ")");
}
if (numIndexDims > numDims) {
throw new IllegalArgumentException("numIndexDims cannot exceed numDims (" + numDims + ") (got: " + numIndexDims + ")");
}
if (bytesPerDim <= 0) {
throw new IllegalArgumentException("bytesPerDim must be > 0; got " + bytesPerDim);
}
if (maxPointsInLeafNode <= 0) {
throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
}
if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
}
}
}

View File

@ -35,57 +35,46 @@ import org.apache.lucene.util.Sorter;
* @lucene.internal
* */
public final class BKDRadixSelector {
//size of the histogram
// size of the histogram
private static final int HISTOGRAM_SIZE = 256;
//size of the online buffer: 8 KB
// size of the online buffer: 8 KB
private static final int MAX_SIZE_OFFLINE_BUFFER = 1024 * 8;
//histogram array
// histogram array
private final long[] histogram;
//bytes per dimension
private final int bytesPerDim;
// number of bytes to be sorted: bytesPerDim + Integer.BYTES
// number of bytes to be sorted: config.bytesPerDim + Integer.BYTES
private final int bytesSorted;
//data dimensions size
private final int packedBytesLength;
// data dimensions plus docID size
private final int packedBytesDocIDLength;
//flag to when we are moving to sort on heap
// flag to when we are moving to sort on heap
private final int maxPointsSortInHeap;
//reusable buffer
// reusable buffer
private final byte[] offlineBuffer;
//holder for partition points
// holder for partition points
private final int[] partitionBucket;
// scratch array to hold temporary data
private final byte[] scratch;
//Directory to create new Offline writer
// Directory to create new Offline writer
private final Directory tempDir;
// prefix for temp files
private final String tempFileNamePrefix;
// data and index dimensions
private final int numDataDims, numIndexDims;
// BKD tree configuration
private final BKDConfig config;
/**
* Sole constructor.
*/
public BKDRadixSelector(int numDataDims, int numIndexDims, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
this.bytesPerDim = bytesPerDim;
this.numDataDims = numDataDims;
this.numIndexDims = numIndexDims;
this.packedBytesLength = numDataDims * bytesPerDim;
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
public BKDRadixSelector(BKDConfig config, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
this.config = config;
this.maxPointsSortInHeap = maxPointsSortInHeap;
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
// Selection and sorting is done in a given dimension. In case the value of the dimension are equal
// between two points we tie break first using the data-only dimensions and if those are still equal
// we tie-break on the docID. Here we account for all bytes used in the process.
this.bytesSorted = bytesPerDim + (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
this.maxPointsSortInHeap = maxPointsSortInHeap;
int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / packedBytesDocIDLength;
this.offlineBuffer = new byte[numberOfPointsOffline * packedBytesDocIDLength];
this.bytesSorted = config.bytesPerDim + (config.numDims - config.numIndexDims) * config.bytesPerDim + Integer.BYTES;
final int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / config.bytesPerDoc;
this.offlineBuffer = new byte[numberOfPointsOffline * config.bytesPerDoc];
this.partitionBucket = new int[bytesSorted];
this.histogram = new long[HISTOGRAM_SIZE];
this.scratch = new byte[bytesSorted];
this.tempDir = tempDir;
this.tempFileNamePrefix = tempFileNamePrefix;
}
/**
@ -108,7 +97,7 @@ public final class BKDRadixSelector {
assert partitionSlices.length > 1 : "[partition alices] must be > 1, got " + partitionSlices.length;
//If we are on heap then we just select on heap
// If we are on heap then we just select on heap
if (points.writer instanceof HeapPointWriter) {
byte[] partition = heapRadixSelect((HeapPointWriter) points.writer, dim, Math.toIntExact(from), Math.toIntExact(to), Math.toIntExact(partitionPoint), dimCommonPrefix);
partitionSlices[0] = new PathSlice(points.writer, from, partitionPoint - from);
@ -136,18 +125,18 @@ public final class BKDRadixSelector {
}
private int findCommonPrefixAndHistogram(OfflinePointWriter points, long from, long to, int dim, int dimCommonPrefix) throws IOException{
//find common prefix
// find common prefix
int commonPrefixPosition = bytesSorted;
final int offset = dim * bytesPerDim;
final int offset = dim * config.bytesPerDim;
try (OfflinePointReader reader = points.getReader(from, to - from, offlineBuffer)) {
assert commonPrefixPosition > dimCommonPrefix;
reader.next();
PointValue pointValue = reader.pointValue();
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
// copy dimension
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, bytesPerDim);
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, config.bytesPerDim);
// copy data dimensions and docID
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + numIndexDims * bytesPerDim, scratch, bytesPerDim, (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES);
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + config.packedIndexBytesLength, scratch, config.bytesPerDim, (config.numDims - config.numIndexDims) * config.bytesPerDim + Integer.BYTES);
for (long i = from + 1; i < to; i++) {
reader.next();
@ -163,20 +152,20 @@ public final class BKDRadixSelector {
}
break;
} else {
//check common prefix and adjust histogram
final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix;
final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition;
// Check common prefix and adjust histogram
final int startIndex = (dimCommonPrefix > config.bytesPerDim) ? config.bytesPerDim : dimCommonPrefix;
final int endIndex = (commonPrefixPosition > config.bytesPerDim) ? config.bytesPerDim : commonPrefixPosition;
packedValueDocID = pointValue.packedValueDocIDBytes();
int j = Arrays.mismatch(scratch, startIndex, endIndex, packedValueDocID.bytes, packedValueDocID.offset + offset + startIndex, packedValueDocID.offset + offset + endIndex);
if (j == -1) {
if (commonPrefixPosition > bytesPerDim) {
//tie-break on data dimensions + docID
final int startTieBreak = numIndexDims * bytesPerDim;
final int endTieBreak = startTieBreak + commonPrefixPosition - bytesPerDim;
int k = Arrays.mismatch(scratch, bytesPerDim, commonPrefixPosition,
if (commonPrefixPosition > config.bytesPerDim) {
// Tie-break on data dimensions + docID
final int startTieBreak = config.packedIndexBytesLength;
final int endTieBreak = startTieBreak + commonPrefixPosition - config.bytesPerDim;
int k = Arrays.mismatch(scratch, config.bytesPerDim, commonPrefixPosition,
packedValueDocID.bytes, packedValueDocID.offset + startTieBreak, packedValueDocID.offset + endTieBreak);
if (k != -1) {
commonPrefixPosition = bytesPerDim + k;
commonPrefixPosition = config.bytesPerDim + k;
Arrays.fill(histogram, 0);
histogram[scratch[commonPrefixPosition] & 0xff] = i - from;
}
@ -193,7 +182,7 @@ public final class BKDRadixSelector {
}
}
//build partition buckets up to commonPrefix
// Build partition buckets up to commonPrefix
for (int i = 0; i < commonPrefixPosition; i++) {
partitionBucket[i] = scratch[i] & 0xff;
}
@ -202,22 +191,22 @@ public final class BKDRadixSelector {
private int getBucket(int offset, int commonPrefixPosition, PointValue pointValue) {
int bucket;
if (commonPrefixPosition < bytesPerDim) {
if (commonPrefixPosition < config.bytesPerDim) {
BytesRef packedValue = pointValue.packedValue();
bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
} else {
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
bucket = packedValueDocID.bytes[packedValueDocID.offset + numIndexDims * bytesPerDim + commonPrefixPosition - bytesPerDim] & 0xff;
bucket = packedValueDocID.bytes[packedValueDocID.offset + config.packedIndexBytesLength + commonPrefixPosition - config.bytesPerDim] & 0xff;
}
return bucket;
}
private byte[] buildHistogramAndPartition(OfflinePointWriter points, PointWriter left, PointWriter right,
long from, long to, long partitionPoint, int iteration, int baseCommonPrefix, int dim) throws IOException {
//find common prefix from baseCommonPrefix and build histogram
// Find common prefix from baseCommonPrefix and build histogram
int commonPrefix = findCommonPrefixAndHistogram(points, from, to, dim, baseCommonPrefix);
//if all equals we just partition the points
// If all equals we just partition the points
if (commonPrefix == bytesSorted) {
offlinePartition(points, left, right, null, from, to, dim, commonPrefix - 1, partitionPoint);
return partitionPointFromCommonPrefix();
@ -226,7 +215,7 @@ public final class BKDRadixSelector {
long leftCount = 0;
long rightCount = 0;
//Count left points and record the partition point
// Count left points and record the partition point
for(int i = 0; i < HISTOGRAM_SIZE; i++) {
long size = histogram[i];
if (leftCount + size > partitionPoint - from) {
@ -235,7 +224,7 @@ public final class BKDRadixSelector {
}
leftCount += size;
}
//Count right points
// Count right points
for(int i = partitionBucket[commonPrefix] + 1; i < HISTOGRAM_SIZE; i++) {
rightCount += histogram[i];
}
@ -243,17 +232,17 @@ public final class BKDRadixSelector {
long delta = histogram[partitionBucket[commonPrefix]];
assert leftCount + rightCount + delta == to - from : (leftCount + rightCount + delta) + " / " + (to - from);
//special case when points are equal except last byte, we can just tie-break
// Special case when points are equal except last byte, we can just tie-break
if (commonPrefix == bytesSorted - 1) {
long tieBreakCount =(partitionPoint - from - leftCount);
offlinePartition(points, left, right, null, from, to, dim, commonPrefix, tieBreakCount);
return partitionPointFromCommonPrefix();
}
//create the delta points writer
// Create the delta points writer
PointWriter deltaPoints;
try (PointWriter tempDeltaPoints = getDeltaPointWriter(left, right, delta, iteration)) {
//divide the points. This actually destroys the current writer
// Divide the points. This actually destroys the current writer
offlinePartition(points, left, right, tempDeltaPoints, from, to, dim, commonPrefix, 0);
deltaPoints = tempDeltaPoints;
}
@ -270,7 +259,7 @@ public final class BKDRadixSelector {
private void offlinePartition(OfflinePointWriter points, PointWriter left, PointWriter right, PointWriter deltaPoints,
long from, long to, int dim, int bytePosition, long numDocsTiebreak) throws IOException {
assert bytePosition == bytesSorted -1 || deltaPoints != null;
int offset = dim * bytesPerDim;
int offset = dim * config.bytesPerDim;
long tiebreakCounter = 0;
try (OfflinePointReader reader = points.getReader(from, to - from, offlineBuffer)) {
while (reader.next()) {
@ -296,13 +285,13 @@ public final class BKDRadixSelector {
}
}
}
//Delete original file
// Delete original file
points.destroy();
}
private byte[] partitionPointFromCommonPrefix() {
byte[] partition = new byte[bytesPerDim];
for (int i = 0; i < bytesPerDim; i++) {
byte[] partition = new byte[config.bytesPerDim];
for (int i = 0; i < config.bytesPerDim; i++) {
partition[i] = (byte)partitionBucket[i];
}
return partition;
@ -322,9 +311,9 @@ public final class BKDRadixSelector {
}
private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefixLength) {
final int dimOffset = dim * bytesPerDim + commonPrefixLength;
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
final int dimOffset = dim * config.bytesPerDim + commonPrefixLength;
final int dimCmpBytes = config.bytesPerDim - commonPrefixLength;
final int dataOffset = config.packedIndexBytesLength - dimCmpBytes;
new RadixSelector(bytesSorted - commonPrefixLength) {
@Override
@ -337,21 +326,20 @@ public final class BKDRadixSelector {
assert k >= 0 : "negative prefix " + k;
if (k < dimCmpBytes) {
// dim bytes
return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
return points.block[i * config.bytesPerDoc + dimOffset + k] & 0xff;
} else {
// data bytes
return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
return points.block[i * config.bytesPerDoc + dataOffset + k] & 0xff;
}
}
@Override
protected Selector getFallbackSelector(int d) {
final int skypedBytes = d + commonPrefixLength;
final int dimStart = dim * bytesPerDim + skypedBytes;
final int dimEnd = dim * bytesPerDim + bytesPerDim;
final int dataOffset = numIndexDims * bytesPerDim;
final int dimStart = dim * config.bytesPerDim + skypedBytes;
final int dimEnd = dim * config.bytesPerDim + config.bytesPerDim;
// data length is composed by the data dimensions plus the docID
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
final int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim + Integer.BYTES;
return new IntroSelector() {
@Override
@ -361,55 +349,55 @@ public final class BKDRadixSelector {
@Override
protected void setPivot(int i) {
if (skypedBytes < bytesPerDim) {
System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
if (skypedBytes < config.bytesPerDim) {
System.arraycopy(points.block, i * config.bytesPerDoc + dim * config.bytesPerDim, scratch, 0, config.bytesPerDim);
}
System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
System.arraycopy(points.block, i * config.bytesPerDoc + config.packedIndexBytesLength, scratch, config.bytesPerDim, dataLength);
}
@Override
protected int compare(int i, int j) {
if (skypedBytes < bytesPerDim) {
int iOffset = i * packedBytesDocIDLength;
int jOffset = j * packedBytesDocIDLength;
if (skypedBytes < config.bytesPerDim) {
int iOffset = i * config.bytesPerDoc;
int jOffset = j * config.bytesPerDoc;
int cmp = Arrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
int iOffset = i * packedBytesDocIDLength + dataOffset;
int jOffset = j * packedBytesDocIDLength + dataOffset;
int iOffset = i * config.bytesPerDoc + config.packedIndexBytesLength;
int jOffset = j * config.bytesPerDoc + config.packedIndexBytesLength;
return Arrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
}
@Override
protected int comparePivot(int j) {
if (skypedBytes < bytesPerDim) {
int jOffset = j * packedBytesDocIDLength;
int cmp = Arrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
if (skypedBytes < config.bytesPerDim) {
int jOffset = j * config.bytesPerDoc;
int cmp = Arrays.compareUnsigned(scratch, skypedBytes, config.bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
int jOffset = j * packedBytesDocIDLength + dataOffset;
return Arrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
int jOffset = j * config.bytesPerDoc + config.packedIndexBytesLength;
return Arrays.compareUnsigned(scratch, config.bytesPerDim, config.bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
}
};
}
}.select(from, to, partitionPoint);
byte[] partition = new byte[bytesPerDim];
byte[] partition = new byte[config.bytesPerDim];
PointValue pointValue = points.getPackedValueSlice(partitionPoint);
BytesRef packedValue = pointValue.packedValue();
System.arraycopy(packedValue.bytes, packedValue.offset + dim * bytesPerDim, partition, 0, bytesPerDim);
System.arraycopy(packedValue.bytes, packedValue.offset + dim * config.bytesPerDim, partition, 0, config.bytesPerDim);
return partition;
}
/** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
final int dimOffset = dim * bytesPerDim + commonPrefixLength;
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
final int dimOffset = dim * config.bytesPerDim + commonPrefixLength;
final int dimCmpBytes = config.bytesPerDim - commonPrefixLength;
final int dataOffset = config.packedIndexBytesLength - dimCmpBytes;
new MSBRadixSorter(bytesSorted - commonPrefixLength) {
@Override
@ -417,10 +405,10 @@ public final class BKDRadixSelector {
assert k >= 0 : "negative prefix " + k;
if (k < dimCmpBytes) {
// dim bytes
return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
return points.block[i * config.bytesPerDoc + dimOffset + k] & 0xff;
} else {
// data bytes
return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
return points.block[i * config.bytesPerDoc + dataOffset + k] & 0xff;
}
}
@ -432,11 +420,10 @@ public final class BKDRadixSelector {
@Override
protected Sorter getFallbackSorter(int k) {
final int skypedBytes = k + commonPrefixLength;
final int dimStart = dim * bytesPerDim + skypedBytes;
final int dimEnd = dim * bytesPerDim + bytesPerDim;
final int dataOffset = numIndexDims * bytesPerDim;
final int dimStart = dim * config.bytesPerDim + skypedBytes;
final int dimEnd = dim * config.bytesPerDim + config.bytesPerDim;
// data length is composed by the data dimensions plus the docID
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
final int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim + Integer.BYTES;
return new IntroSorter() {
@Override
@ -446,38 +433,38 @@ public final class BKDRadixSelector {
@Override
protected void setPivot(int i) {
if (skypedBytes < bytesPerDim) {
System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
if (skypedBytes < config.bytesPerDim) {
System.arraycopy(points.block, i * config.bytesPerDoc + dim * config.bytesPerDim, scratch, 0, config.bytesPerDim);
}
System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
System.arraycopy(points.block, i * config.bytesPerDoc + config.packedIndexBytesLength, scratch, config.bytesPerDim, dataLength);
}
@Override
protected int compare(int i, int j) {
if (skypedBytes < bytesPerDim) {
int iOffset = i * packedBytesDocIDLength;
int jOffset = j * packedBytesDocIDLength;
if (skypedBytes < config.bytesPerDim) {
int iOffset = i * config.bytesPerDoc;
int jOffset = j * config.bytesPerDoc;
int cmp = Arrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
int iOffset = i * packedBytesDocIDLength + dataOffset;
int jOffset = j * packedBytesDocIDLength + dataOffset;
int iOffset = i * config.bytesPerDoc + config.packedIndexBytesLength;
int jOffset = j * config.bytesPerDoc + config.packedIndexBytesLength;
return Arrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
}
@Override
protected int comparePivot(int j) {
if (skypedBytes < bytesPerDim) {
int jOffset = j * packedBytesDocIDLength;
int cmp = Arrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
if (skypedBytes < config.bytesPerDim) {
int jOffset = j * config.bytesPerDoc;
int cmp = Arrays.compareUnsigned(scratch, skypedBytes, config.bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
int jOffset = j * packedBytesDocIDLength + dataOffset;
return Arrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
int jOffset = j * config.bytesPerDoc + config.packedIndexBytesLength;
return Arrays.compareUnsigned(scratch, config.bytesPerDim, config.bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
}
};
}
@ -486,9 +473,9 @@ public final class BKDRadixSelector {
private PointWriter getDeltaPointWriter(PointWriter left, PointWriter right, long delta, int iteration) throws IOException {
if (delta <= getMaxPointsSortInHeap(left, right)) {
return new HeapPointWriter(Math.toIntExact(delta), packedBytesLength);
return new HeapPointWriter(config, Math.toIntExact(delta));
} else {
return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, "delta" + iteration, delta);
return new OfflinePointWriter(config, tempDir, tempFileNamePrefix, "delta" + iteration, delta);
}
}
@ -505,13 +492,13 @@ public final class BKDRadixSelector {
}
PointWriter getPointWriter(long count, String desc) throws IOException {
//As we recurse, we hold two on-heap point writers at any point. Therefore the
//max size for these objects is half of the total points we can have on-heap.
// As we recurse, we hold two on-heap point writers at any point. Therefore the
// max size for these objects is half of the total points we can have on-heap.
if (count <= maxPointsSortInHeap / 2) {
int size = Math.toIntExact(count);
return new HeapPointWriter(size, packedBytesLength);
return new HeapPointWriter(config, size);
} else {
return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, desc, count);
return new OfflinePointWriter(config, tempDir, tempFileNamePrefix, desc, count);
}
}

View File

@ -36,19 +36,14 @@ public final class BKDReader extends PointValues {
// Packed array of byte[] holding all split values in the full binary tree:
final int leafNodeOffset;
final int numDataDims;
final int numIndexDims;
final int bytesPerDim;
final BKDConfig config;
final int numLeaves;
final IndexInput in;
final int maxPointsInLeafNode;
final byte[] minPackedValue;
final byte[] maxPackedValue;
final long pointCount;
final int docCount;
final int version;
protected final int packedBytesLength;
protected final int packedIndexBytesLength;
final long minLeafBlockFP;
final IndexInput packedIndex;
@ -57,30 +52,30 @@ public final class BKDReader extends PointValues {
* BKD tree is always stored off-heap. */
public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throws IOException {
version = CodecUtil.checkHeader(metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
numDataDims = metaIn.readVInt();
final int numDims = metaIn.readVInt();
final int numIndexDims;
if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
numIndexDims = metaIn.readVInt();
} else {
numIndexDims = numDataDims;
numIndexDims = numDims;
}
maxPointsInLeafNode = metaIn.readVInt();
bytesPerDim = metaIn.readVInt();
packedBytesLength = numDataDims * bytesPerDim;
packedIndexBytesLength = numIndexDims * bytesPerDim;
final int maxPointsInLeafNode = metaIn.readVInt();
final int bytesPerDim = metaIn.readVInt();
config = new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
// Read index:
numLeaves = metaIn.readVInt();
assert numLeaves > 0;
leafNodeOffset = numLeaves;
minPackedValue = new byte[packedIndexBytesLength];
maxPackedValue = new byte[packedIndexBytesLength];
minPackedValue = new byte[config.packedIndexBytesLength];
maxPackedValue = new byte[config.packedIndexBytesLength];
metaIn.readBytes(minPackedValue, 0, packedIndexBytesLength);
metaIn.readBytes(maxPackedValue, 0, packedIndexBytesLength);
metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength);
metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength);
for(int dim=0;dim<numIndexDims;dim++) {
if (Arrays.compareUnsigned(minPackedValue, dim * bytesPerDim, dim * bytesPerDim + bytesPerDim, maxPackedValue, dim * bytesPerDim, dim * bytesPerDim + bytesPerDim) > 0) {
for(int dim=0;dim<config.numIndexDims;dim++) {
if (Arrays.compareUnsigned(minPackedValue, dim * config.bytesPerDim, dim * config.bytesPerDim + config.bytesPerDim, maxPackedValue, dim * config.bytesPerDim, dim * config.bytesPerDim + config.bytesPerDim) > 0) {
throw new CorruptIndexException("minPackedValue " + new BytesRef(minPackedValue) + " is > maxPackedValue " + new BytesRef(maxPackedValue) + " for dim=" + dim, metaIn);
}
}
@ -144,16 +139,16 @@ public final class BKDReader extends PointValues {
splitPackedValueStack = new byte[treeDepth+1][];
this.nodeID = nodeID;
this.level = level;
splitPackedValueStack[level] = new byte[packedIndexBytesLength];
splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
leafBlockFPStack = new long[treeDepth+1];
rightNodePositions = new int[treeDepth+1];
splitValuesStack = new byte[treeDepth+1][];
splitDims = new int[treeDepth+1];
negativeDeltas = new boolean[numIndexDims*(treeDepth+1)];
negativeDeltas = new boolean[config.numIndexDims*(treeDepth+1)];
this.in = in;
splitValuesStack[0] = new byte[packedIndexBytesLength];
splitValuesStack[0] = new byte[config.packedIndexBytesLength];
scratch = new BytesRef();
scratch.length = bytesPerDim;
scratch.length = config.bytesPerDim;
}
public void pushLeft() {
@ -171,7 +166,7 @@ public final class BKDReader extends PointValues {
index.leafBlockFPStack[level] = leafBlockFPStack[level];
index.rightNodePositions[level] = rightNodePositions[level];
index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
System.arraycopy(negativeDeltas, level*numIndexDims, index.negativeDeltas, level*numIndexDims, numIndexDims);
System.arraycopy(negativeDeltas, level*config.numIndexDims, index.negativeDeltas, level*config.numIndexDims, config.numIndexDims);
index.splitDims[level] = splitDims[level];
return index;
}
@ -224,7 +219,7 @@ public final class BKDReader extends PointValues {
public BytesRef getSplitDimValue() {
assert isLeafNode() == false;
scratch.bytes = splitValuesStack[level];
scratch.offset = splitDim * bytesPerDim;
scratch.offset = splitDim * config.bytesPerDim;
return scratch;
}
@ -271,11 +266,11 @@ public final class BKDReader extends PointValues {
private void readNodeData(boolean isLeft) {
if (splitPackedValueStack[level] == null) {
splitPackedValueStack[level] = new byte[packedIndexBytesLength];
splitPackedValueStack[level] = new byte[config.packedIndexBytesLength];
}
System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims);
System.arraycopy(negativeDeltas, (level-1)*config.numIndexDims, negativeDeltas, level*config.numIndexDims, config.numIndexDims);
assert splitDim != -1;
negativeDeltas[level*numIndexDims+splitDim] = isLeft;
negativeDeltas[level*config.numIndexDims+splitDim] = isLeft;
try {
leafBlockFPStack[level] = leafBlockFPStack[level - 1];
@ -291,24 +286,24 @@ public final class BKDReader extends PointValues {
// read split dim, prefix, firstDiffByteDelta encoded as int:
int code = in.readVInt();
splitDim = code % numIndexDims;
splitDim = code % config.numIndexDims;
splitDims[level] = splitDim;
code /= numIndexDims;
int prefix = code % (1 + bytesPerDim);
int suffix = bytesPerDim - prefix;
code /= config.numIndexDims;
int prefix = code % (1 + config.bytesPerDim);
int suffix = config.bytesPerDim - prefix;
if (splitValuesStack[level] == null) {
splitValuesStack[level] = new byte[packedIndexBytesLength];
splitValuesStack[level] = new byte[config.packedIndexBytesLength];
}
System.arraycopy(splitValuesStack[level - 1], 0, splitValuesStack[level], 0, packedIndexBytesLength);
System.arraycopy(splitValuesStack[level - 1], 0, splitValuesStack[level], 0, config.packedIndexBytesLength);
if (suffix > 0) {
int firstDiffByteDelta = code / (1 + bytesPerDim);
if (negativeDeltas[level * numIndexDims + splitDim]) {
int firstDiffByteDelta = code / (1 + config.bytesPerDim);
if (negativeDeltas[level * config.numIndexDims + splitDim]) {
firstDiffByteDelta = -firstDiffByteDelta;
}
int oldByte = splitValuesStack[level][splitDim * bytesPerDim + prefix] & 0xFF;
splitValuesStack[level][splitDim * bytesPerDim + prefix] = (byte) (oldByte + firstDiffByteDelta);
in.readBytes(splitValuesStack[level], splitDim * bytesPerDim + prefix + 1, suffix - 1);
int oldByte = splitValuesStack[level][splitDim * config.bytesPerDim + prefix] & 0xFF;
splitValuesStack[level][splitDim * config.bytesPerDim + prefix] = (byte) (oldByte + firstDiffByteDelta);
in.readBytes(splitValuesStack[level], splitDim * config.bytesPerDim + prefix + 1, suffix - 1);
} else {
// our split value is == last split value in this dim, which can happen when there are many duplicate values
}
@ -347,19 +342,17 @@ public final class BKDReader extends PointValues {
final IntersectVisitor visitor;
public final IndexTree index;
public IntersectState(IndexInput in, int numDims,
int packedBytesLength,
int packedIndexBytesLength,
int maxPointsInLeafNode,
public IntersectState(IndexInput in,
BKDConfig config,
IntersectVisitor visitor,
IndexTree indexVisitor) {
this.in = in;
this.visitor = visitor;
this.commonPrefixLengths = new int[numDims];
this.scratchIterator = new BKDReaderDocIDSetIterator(maxPointsInLeafNode);
this.scratchDataPackedValue = new byte[packedBytesLength];
this.scratchMinIndexPackedValue = new byte[packedIndexBytesLength];
this.scratchMaxIndexPackedValue = new byte[packedIndexBytesLength];
this.commonPrefixLengths = new int[config.numDims];
this.scratchIterator = new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode);
this.scratchDataPackedValue = new byte[config.packedBytesLength];
this.scratchMinIndexPackedValue = new byte[config.packedIndexBytesLength];
this.scratchMaxIndexPackedValue = new byte[config.packedIndexBytesLength];
this.index = indexVisitor;
}
}
@ -379,7 +372,7 @@ public final class BKDReader extends PointValues {
//System.out.println("R: addAll nodeID=" + nodeID);
if (grown == false) {
final long maxPointCount = (long) maxPointsInLeafNode * state.index.getNumLeaves();
final long maxPointCount = (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
if (maxPointCount <= Integer.MAX_VALUE) { // could be >MAX_VALUE if there are more than 2B points in total
state.visitor.grow((int) maxPointCount);
grown = true;
@ -407,12 +400,7 @@ public final class BKDReader extends PointValues {
/** Create a new {@link IntersectState} */
public IntersectState getIntersectState(IntersectVisitor visitor) {
IndexTree index = new IndexTree();
return new IntersectState(in.clone(), numDataDims,
packedBytesLength,
packedIndexBytesLength,
maxPointsInLeafNode,
visitor,
index);
return new IntersectState(in.clone(), config, visitor, index);
}
/** Visits all docIDs and packed values in a single leaf block */
@ -460,12 +448,12 @@ public final class BKDReader extends PointValues {
IndexInput in, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor) throws IOException {
readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
if (numIndexDims != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
if (config.numIndexDims != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
byte[] minPackedValue = scratchMinIndexPackedValue;
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, packedIndexBytesLength);
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
byte[] maxPackedValue = scratchMaxIndexPackedValue;
// Copy common prefixes before reading adjusted box
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
// The index gives us range of values for each dimension, but the actual range of values
@ -510,12 +498,12 @@ public final class BKDReader extends PointValues {
visitor.grow(count);
visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
} else {
if (numIndexDims != 1) {
if (config.numIndexDims != 1) {
byte[] minPackedValue = scratchMinIndexPackedValue;
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, packedIndexBytesLength);
System.arraycopy(scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
byte[] maxPackedValue = scratchMaxIndexPackedValue;
// Copy common prefixes before reading adjusted box
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, packedIndexBytesLength);
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
// The index gives us range of values for each dimension, but the actual range of values
@ -550,10 +538,10 @@ public final class BKDReader extends PointValues {
}
private void readMinMax(int[] commonPrefixLengths, byte[] minPackedValue, byte[] maxPackedValue, IndexInput in) throws IOException {
for (int dim = 0; dim < numIndexDims; dim++) {
for (int dim = 0; dim < config.numIndexDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(minPackedValue, dim * bytesPerDim + prefix, bytesPerDim - prefix);
in.readBytes(maxPackedValue, dim * bytesPerDim + prefix, bytesPerDim - prefix);
in.readBytes(minPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
in.readBytes(maxPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
}
@ -562,9 +550,9 @@ public final class BKDReader extends PointValues {
int i;
for (i = 0; i < count;) {
int length = in.readVInt();
for(int dim = 0; dim < numDataDims; dim++) {
for(int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim*bytesPerDim + prefix, bytesPerDim - prefix);
in.readBytes(scratchPackedValue, dim*config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
scratchIterator.reset(i, length);
visitor.visit(scratchIterator, scratchPackedValue);
@ -584,16 +572,16 @@ public final class BKDReader extends PointValues {
private void visitCompressedDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, BKDReaderDocIDSetIterator scratchIterator, int count, IntersectVisitor visitor, int compressedDim) throws IOException {
// the byte at `compressedByteOffset` is compressed using run-length compression,
// other suffix bytes are stored verbatim
final int compressedByteOffset = compressedDim * bytesPerDim + commonPrefixLengths[compressedDim];
final int compressedByteOffset = compressedDim * config.bytesPerDim + commonPrefixLengths[compressedDim];
commonPrefixLengths[compressedDim]++;
int i;
for (i = 0; i < count; ) {
scratchPackedValue[compressedByteOffset] = in.readByte();
final int runLen = Byte.toUnsignedInt(in.readByte());
for (int j = 0; j < runLen; ++j) {
for(int dim = 0; dim < numDataDims; dim++) {
for(int dim = 0; dim < config.numDims; dim++) {
int prefix = commonPrefixLengths[dim];
in.readBytes(scratchPackedValue, dim*bytesPerDim + prefix, bytesPerDim - prefix);
in.readBytes(scratchPackedValue, dim*config.bytesPerDim + prefix, config.bytesPerDim - prefix);
}
visitor.visit(scratchIterator.docIDs[i+j], scratchPackedValue);
}
@ -606,18 +594,18 @@ public final class BKDReader extends PointValues {
private int readCompressedDim(IndexInput in) throws IOException {
int compressedDim = in.readByte();
if (compressedDim < -2 || compressedDim >= numDataDims || (version < BKDWriter.VERSION_LOW_CARDINALITY_LEAVES && compressedDim == -2)) {
if (compressedDim < -2 || compressedDim >= config.numDims || (version < BKDWriter.VERSION_LOW_CARDINALITY_LEAVES && compressedDim == -2)) {
throw new CorruptIndexException("Got compressedDim="+compressedDim, in);
}
return compressedDim;
}
private void readCommonPrefixes(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
for(int dim=0;dim<numDataDims;dim++) {
for(int dim=0;dim<config.numDims;dim++) {
int prefix = in.readVInt();
commonPrefixLengths[dim] = prefix;
if (prefix > 0) {
in.readBytes(scratchPackedValue, dim*bytesPerDim, prefix);
in.readBytes(scratchPackedValue, dim*config.bytesPerDim, prefix);
}
//System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
}
@ -628,7 +616,7 @@ public final class BKDReader extends PointValues {
/*
System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
}
*/
@ -657,31 +645,31 @@ public final class BKDReader extends PointValues {
// Non-leaf node: recurse on the split left and right nodes
int splitDim = state.index.getSplitDim();
assert splitDim >= 0: "splitDim=" + splitDim + ", numIndexDims=" + numIndexDims;
assert splitDim < numIndexDims: "splitDim=" + splitDim + ", numIndexDims=" + numIndexDims;
assert splitDim >= 0: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
assert splitDim < config.numIndexDims: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
byte[] splitPackedValue = state.index.getSplitPackedValue();
BytesRef splitDimValue = state.index.getSplitDimValue();
assert splitDimValue.length == bytesPerDim;
assert splitDimValue.length == config.bytesPerDim;
//System.out.println(" splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
// make sure cellMin <= splitValue <= cellMax:
assert Arrays.compareUnsigned(cellMinPacked, splitDim * bytesPerDim, splitDim * bytesPerDim + bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + bytesPerDim) <= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numIndexDims=" + numIndexDims + " numDataDims=" + numDataDims;
assert Arrays.compareUnsigned(cellMaxPacked, splitDim * bytesPerDim, splitDim * bytesPerDim + bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + bytesPerDim) >= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numIndexDims=" + numIndexDims + " numDataDims=" + numDataDims;
assert Arrays.compareUnsigned(cellMinPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) <= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
assert Arrays.compareUnsigned(cellMaxPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) >= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushLeft();
intersect(state, cellMinPacked, splitPackedValue);
state.index.pop();
// Restore the split dim value since it may have been overwritten while recursing:
System.arraycopy(splitPackedValue, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset, bytesPerDim);
System.arraycopy(splitPackedValue, splitDim*config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, config.bytesPerDim);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushRight();
intersect(state, splitPackedValue, cellMaxPacked);
state.index.pop();
@ -693,7 +681,7 @@ public final class BKDReader extends PointValues {
/*
System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
for(int dim=0;dim<numDims;dim++) {
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
System.out.println(" dim=" + dim + "\n cellMin=" + new BytesRef(cellMinPacked, dim*config.bytesPerDim, config.bytesPerDim) + "\n cellMax=" + new BytesRef(cellMaxPacked, dim*config.bytesPerDim, config.bytesPerDim));
}
*/
@ -703,39 +691,39 @@ public final class BKDReader extends PointValues {
// This cell is fully outside of the query shape: stop recursing
return 0L;
} else if (r == Relation.CELL_INSIDE_QUERY) {
return (long) maxPointsInLeafNode * state.index.getNumLeaves();
return (long) config.maxPointsInLeafNode * state.index.getNumLeaves();
} else if (state.index.isLeafNode()) {
// Assume half the points matched
return (maxPointsInLeafNode + 1) / 2;
return (config.maxPointsInLeafNode + 1) / 2;
} else {
// Non-leaf node: recurse on the split left and right nodes
int splitDim = state.index.getSplitDim();
assert splitDim >= 0: "splitDim=" + splitDim + ", numIndexDims=" + numIndexDims;
assert splitDim < numIndexDims: "splitDim=" + splitDim + ", numIndexDims=" + numIndexDims;
assert splitDim >= 0: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
assert splitDim < config.numIndexDims: "splitDim=" + splitDim + ", config.numIndexDims=" + config.numIndexDims;
byte[] splitPackedValue = state.index.getSplitPackedValue();
BytesRef splitDimValue = state.index.getSplitDimValue();
assert splitDimValue.length == bytesPerDim;
assert splitDimValue.length == config.bytesPerDim;
//System.out.println(" splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
// make sure cellMin <= splitValue <= cellMax:
assert Arrays.compareUnsigned(cellMinPacked, splitDim * bytesPerDim, splitDim * bytesPerDim + bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + bytesPerDim) <= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numIndexDims=" + numIndexDims + " numDataDims=" + numDataDims;
assert Arrays.compareUnsigned(cellMaxPacked, splitDim * bytesPerDim, splitDim * bytesPerDim + bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + bytesPerDim) >= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numIndexDims=" + numIndexDims + " numDataDims=" + numDataDims;
assert Arrays.compareUnsigned(cellMinPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) <= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
assert Arrays.compareUnsigned(cellMaxPacked, splitDim * config.bytesPerDim, splitDim * config.bytesPerDim + config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, splitDimValue.offset + config.bytesPerDim) >= 0: "config.bytesPerDim=" + config.bytesPerDim + " splitDim=" + splitDim + " config.numIndexDims=" + config.numIndexDims + " config.numDims=" + config.numDims;
// Recurse on left sub-tree:
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushLeft();
final long leftCost = estimatePointCount(state, cellMinPacked, splitPackedValue);
state.index.pop();
// Restore the split dim value since it may have been overwritten while recursing:
System.arraycopy(splitPackedValue, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset, bytesPerDim);
System.arraycopy(splitPackedValue, splitDim*config.bytesPerDim, splitDimValue.bytes, splitDimValue.offset, config.bytesPerDim);
// Recurse on right sub-tree:
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, config.packedIndexBytesLength);
System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*config.bytesPerDim, config.bytesPerDim);
state.index.pushRight();
final long rightCost = estimatePointCount(state, splitPackedValue, cellMaxPacked);
state.index.pop();
@ -755,17 +743,17 @@ public final class BKDReader extends PointValues {
@Override
public int getNumDimensions() {
return numDataDims;
return config.numDims;
}
@Override
public int getNumIndexDimensions() {
return numIndexDims;
return config.numIndexDims;
}
@Override
public int getBytesPerDimension() {
return bytesPerDim;
return config.bytesPerDim;
}
@Override

View File

@ -26,19 +26,17 @@ import org.apache.lucene.util.BytesRef;
public final class HeapPointReader implements PointReader {
private int curRead;
final byte[] block;
final int packedBytesLength;
final int packedBytesDocIDLength;
final BKDConfig config;
final int end;
private final HeapPointValue pointValue;
public HeapPointReader(byte[] block, int packedBytesLength, int start, int end) {
public HeapPointReader(BKDConfig config, byte[] block, int start, int end) {
this.block = block;
curRead = start-1;
this.end = end;
this.packedBytesLength = packedBytesLength;
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
this.config = config;
if (start < end) {
this.pointValue = new HeapPointValue(block, packedBytesLength);
this.pointValue = new HeapPointValue(config, block);
} else {
//no values
this.pointValue = null;
@ -53,7 +51,7 @@ public final class HeapPointReader implements PointReader {
@Override
public PointValue pointValue() {
pointValue.setOffset(curRead * packedBytesDocIDLength);
pointValue.setOffset(curRead * config.bytesPerDoc);
return pointValue;
}
@ -70,10 +68,10 @@ public final class HeapPointReader implements PointReader {
final BytesRef packedValueDocID;
final int packedValueLength;
HeapPointValue(byte[] value, int packedValueLength) {
this.packedValueLength = packedValueLength;
HeapPointValue(BKDConfig config, byte[] value) {
this.packedValueLength = config.packedBytesLength;
this.packedValue = new BytesRef(value, 0, packedValueLength);
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
this.packedValueDocID = new BytesRef(value, 0, config.bytesPerDoc);
}
/**

View File

@ -28,8 +28,7 @@ import org.apache.lucene.util.BytesRef;
public final class HeapPointWriter implements PointWriter {
public final byte[] block;
final int size;
final int packedBytesLength;
final int packedBytesDocIDLength;
final BKDConfig config;
private final byte[] scratch;
private int nextWrite;
private boolean closed;
@ -37,16 +36,15 @@ public final class HeapPointWriter implements PointWriter {
private HeapPointReader.HeapPointValue pointValue;
public HeapPointWriter(int size, int packedBytesLength) {
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
this.packedBytesLength = packedBytesLength;
this.block = new byte[packedBytesDocIDLength * size];
public HeapPointWriter(BKDConfig config, int size) {
this.config = config;
this.block = new byte[config.bytesPerDoc * size];
this.size = size;
this.scratch = new byte[packedBytesDocIDLength];
this.scratch = new byte[config.bytesPerDoc];
if (size > 0) {
pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength);
pointValue = new HeapPointReader.HeapPointValue(config, block);
} else {
//no values
// no values
pointValue = null;
}
}
@ -54,17 +52,17 @@ public final class HeapPointWriter implements PointWriter {
/** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
public PointValue getPackedValueSlice(int index) {
assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
pointValue.setOffset(index * packedBytesDocIDLength);
pointValue.setOffset(index * config.bytesPerDoc);
return pointValue;
}
@Override
public void append(byte[] packedValue, int docID) {
assert closed == false : "point writer is already closed";
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
assert packedValue.length == config.packedBytesLength : "[packedValue] must have length [" + config.packedBytesLength + "] but was [" + packedValue.length + "]";
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
System.arraycopy(packedValue, 0, block, nextWrite * packedBytesDocIDLength, packedBytesLength);
int position = nextWrite * packedBytesDocIDLength + packedBytesLength;
System.arraycopy(packedValue, 0, block, nextWrite * config.bytesPerDoc, config.packedBytesLength);
int position = nextWrite * config.bytesPerDoc + config.packedBytesLength;
block[position] = (byte) (docID >> 24);
block[++position] = (byte) (docID >> 16);
block[++position] = (byte) (docID >> 8);
@ -77,33 +75,32 @@ public final class HeapPointWriter implements PointWriter {
assert closed == false : "point writer is already closed";
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
assert packedValueDocID.length == packedBytesDocIDLength : "[packedValue] must have length [" + (packedBytesDocIDLength) + "] but was [" + packedValueDocID.length + "]";
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset, block, nextWrite * packedBytesDocIDLength, packedBytesDocIDLength);
assert packedValueDocID.length == config.bytesPerDoc : "[packedValue] must have length [" + (config.bytesPerDoc) + "] but was [" + packedValueDocID.length + "]";
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset, block, nextWrite * config.bytesPerDoc, config.bytesPerDoc);
nextWrite++;
}
public void swap(int i, int j) {
int indexI = i * packedBytesDocIDLength;
int indexJ = j * packedBytesDocIDLength;
int indexI = i * config.bytesPerDoc;
int indexJ = j * config.bytesPerDoc;
// scratch1 = values[i]
System.arraycopy(block, indexI, scratch, 0, packedBytesDocIDLength);
System.arraycopy(block, indexI, scratch, 0, config.bytesPerDoc);
// values[i] = values[j]
System.arraycopy(block, indexJ, block, indexI, packedBytesDocIDLength);
System.arraycopy(block, indexJ, block, indexI, config.bytesPerDoc);
// values[j] = scratch1
System.arraycopy(scratch, 0, block, indexJ, packedBytesDocIDLength);
System.arraycopy(scratch, 0, block, indexJ, config.bytesPerDoc);
}
public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
assert packedBytesLength == numDataDims * bytesPerDim;
public int computeCardinality(int from, int to, int[] commonPrefixLengths) {
int leafCardinality = 1;
for (int i = from + 1; i < to; i++) {
for (int dim = 0; dim < numDataDims; dim++) {
final int start = dim * bytesPerDim + commonPrefixLengths[dim];
final int end = dim * bytesPerDim + bytesPerDim;
if (Arrays.mismatch(block, i * packedBytesDocIDLength + start, i * packedBytesDocIDLength + end,
block, (i - 1) * packedBytesDocIDLength + start, (i - 1) * packedBytesDocIDLength + end) != -1) {
for (int dim = 0; dim < config.numDims; dim++) {
final int start = dim * config.bytesPerDim + commonPrefixLengths[dim];
final int end = dim * config.bytesPerDim + config.bytesPerDim;
if (Arrays.mismatch(block, i * config.bytesPerDoc + start, i * config.bytesPerDoc + end,
block, (i - 1) * config.bytesPerDoc + start, (i - 1) * config.bytesPerDoc + end) != -1) {
leafCardinality++;
break;
}
@ -122,7 +119,7 @@ public final class HeapPointWriter implements PointWriter {
assert closed : "point writer is still open and trying to get a reader";
assert start + length <= size: "start=" + start + " length=" + length + " docIDs.length=" + size;
assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
return new HeapPointReader(block, packedBytesLength, (int) start, Math.toIntExact(start+length));
return new HeapPointReader(config, block, (int) start, Math.toIntExact(start+length));
}
@Override

View File

@ -35,10 +35,10 @@ public final class MutablePointsReaderUtils {
MutablePointsReaderUtils() {}
/** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */
public static void sort(int maxDoc, int packedBytesLength,
public static void sort(BKDConfig config, int maxDoc,
MutablePointValues reader, int from, int to) {
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
new MSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
@Override
protected void swap(int i, int j) {
@ -47,10 +47,10 @@ public final class MutablePointsReaderUtils {
@Override
protected int byteAt(int i, int k) {
if (k < packedBytesLength) {
if (k < config.packedBytesLength) {
return Byte.toUnsignedInt(reader.getByteAt(i, k));
} else {
final int shift = bitsPerDocId - ((k - packedBytesLength + 1) << 3);
final int shift = bitsPerDocId - ((k - config.packedBytesLength + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
@ -76,10 +76,10 @@ public final class MutablePointsReaderUtils {
@Override
protected int comparePivot(int j) {
if (k < packedBytesLength) {
if (k < config.packedBytesLength) {
reader.getValue(j, scratch);
int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k,
scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + config.packedBytesLength - k,
scratch.bytes, scratch.offset + k, scratch.offset + k + config.packedBytesLength - k);
if (cmp != 0) {
return cmp;
}
@ -93,14 +93,12 @@ public final class MutablePointsReaderUtils {
}
/** Sort points on the given dimension. */
public static void sortByDim(int numDataDim, int numIndexDim, int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
public static void sortByDim(BKDConfig config, int sortedDim, int[] commonPrefixLengths,
MutablePointValues reader, int from, int to,
BytesRef scratch1, BytesRef scratch2) {
final int start = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
final int dimEnd = sortedDim * bytesPerDim + bytesPerDim;
final int dataStart = numIndexDim * bytesPerDim;
final int dataEnd = dataStart + (numDataDim - numIndexDim) * bytesPerDim;
final int start = sortedDim * config.bytesPerDim + commonPrefixLengths[sortedDim];
final int dimEnd = sortedDim * config.bytesPerDim + config.bytesPerDim;
// No need for a fancy radix sort here, this is called on the leaves only so
// there are not many values to sort
new IntroSorter() {
@ -125,8 +123,8 @@ public final class MutablePointsReaderUtils {
int cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + start, pivot.offset + dimEnd, scratch2.bytes,
scratch2.offset + start, scratch2.offset + dimEnd);
if (cmp == 0) {
cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
cmp = Arrays.compareUnsigned(pivot.bytes, pivot.offset + config.packedIndexBytesLength, pivot.offset + config.packedBytesLength,
scratch2.bytes, scratch2.offset + config.packedIndexBytesLength, scratch2.offset + config.packedBytesLength);
if (cmp == 0) {
cmp = pivotDoc - reader.getDocID(j);
}
@ -139,20 +137,19 @@ public final class MutablePointsReaderUtils {
/** Partition points around {@code mid}. All values on the left must be less
* than or equal to it and all values on the right must be greater than or
* equal to it. */
public static void partition(int numDataDim, int numIndexDim, int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
public static void partition(BKDConfig config, int maxDoc, int splitDim, int commonPrefixLen,
MutablePointValues reader, int from, int to, int mid,
BytesRef scratch1, BytesRef scratch2) {
final int dimOffset = splitDim * bytesPerDim + commonPrefixLen;
final int dimCmpBytes = bytesPerDim - commonPrefixLen;
final int dataOffset = numIndexDim * bytesPerDim;
final int dataCmpBytes = (numDataDim - numIndexDim) * bytesPerDim + dimCmpBytes;
final int dimOffset = splitDim * config.bytesPerDim + commonPrefixLen;
final int dimCmpBytes = config.bytesPerDim - commonPrefixLen;
final int dataCmpBytes = (config.numDims - config.numIndexDims) * config.bytesPerDim + dimCmpBytes;
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
@Override
protected Selector getFallbackSelector(int k) {
final int dataStart = (k < dimCmpBytes) ? dataOffset : dataOffset + k - dimCmpBytes;
final int dataEnd = numDataDim * bytesPerDim;
final int dataStart = (k < dimCmpBytes) ? config.packedIndexBytesLength : config.packedIndexBytesLength + k - dimCmpBytes;
final int dataEnd = config.numDims * config.bytesPerDim;
return new IntroSelector() {
final BytesRef pivot = scratch1;
@ -202,7 +199,7 @@ public final class MutablePointsReaderUtils {
if (k < dimCmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
} else if (k < dataCmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, dataOffset + k - dimCmpBytes));
return Byte.toUnsignedInt(reader.getByteAt(i, config.packedIndexBytesLength + k - dimCmpBytes));
} else {
final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;

View File

@ -37,32 +37,30 @@ public final class OfflinePointReader implements PointReader {
final IndexInput in;
byte[] onHeapBuffer;
int offset;
final int bytesPerDoc;
private boolean checked;
private final int packedValueLength;
private final BKDConfig config;
private int pointsInBuffer;
private final int maxPointOnHeap;
// File name we are reading
final String name;
private final OfflinePointValue pointValue;
public OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length, byte[] reusableBuffer) throws IOException {
this.bytesPerDoc = packedBytesLength + Integer.BYTES;
this.packedValueLength = packedBytesLength;
public OfflinePointReader(BKDConfig config, Directory tempDir, String tempFileName, long start, long length, byte[] reusableBuffer) throws IOException {
this.config = config;
if ((start + length) * bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName);
if ((start + length) * config.bytesPerDoc + CodecUtil.footerLength() > tempDir.fileLength(tempFileName)) {
throw new IllegalArgumentException("requested slice is beyond the length of this file: start=" + start + " length=" + length + " bytesPerDoc=" + config.bytesPerDoc + " fileLength=" + tempDir.fileLength(tempFileName) + " tempFileName=" + tempFileName);
}
if (reusableBuffer == null) {
throw new IllegalArgumentException("[reusableBuffer] cannot be null");
}
if (reusableBuffer.length < bytesPerDoc) {
throw new IllegalArgumentException("Length of [reusableBuffer] must be bigger than " + bytesPerDoc);
if (reusableBuffer.length < config.bytesPerDoc) {
throw new IllegalArgumentException("Length of [reusableBuffer] must be bigger than " + config.bytesPerDoc);
}
this.maxPointOnHeap = reusableBuffer.length / bytesPerDoc;
this.maxPointOnHeap = reusableBuffer.length / config.bytesPerDoc;
// Best-effort checksumming:
if (start == 0 && length*bytesPerDoc == tempDir.fileLength(tempFileName) - CodecUtil.footerLength()) {
if (start == 0 && length*config.bytesPerDoc == tempDir.fileLength(tempFileName) - CodecUtil.footerLength()) {
// If we are going to read the entire file, e.g. because BKDWriter is now
// partitioning it, we open with checksums:
in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE);
@ -76,11 +74,11 @@ public final class OfflinePointReader implements PointReader {
name = tempFileName;
long seekFP = start * bytesPerDoc;
long seekFP = start * config.bytesPerDoc;
in.seek(seekFP);
countLeft = length;
this.onHeapBuffer = reusableBuffer;
this.pointValue = new OfflinePointValue(onHeapBuffer, packedValueLength);
this.pointValue = new OfflinePointValue(config, onHeapBuffer);
}
@Override
@ -93,11 +91,11 @@ public final class OfflinePointReader implements PointReader {
}
try {
if (countLeft > maxPointOnHeap) {
in.readBytes(onHeapBuffer, 0, maxPointOnHeap * bytesPerDoc);
in.readBytes(onHeapBuffer, 0, maxPointOnHeap * config.bytesPerDoc);
pointsInBuffer = maxPointOnHeap - 1;
countLeft -= maxPointOnHeap;
} else {
in.readBytes(onHeapBuffer, 0, (int) countLeft * bytesPerDoc);
in.readBytes(onHeapBuffer, 0, (int) countLeft * config.bytesPerDoc);
pointsInBuffer = Math.toIntExact(countLeft - 1);
countLeft = 0;
}
@ -108,7 +106,7 @@ public final class OfflinePointReader implements PointReader {
}
} else {
this.pointsInBuffer--;
this.offset += bytesPerDoc;
this.offset += config.bytesPerDoc;
}
return true;
}
@ -141,10 +139,10 @@ public final class OfflinePointReader implements PointReader {
final BytesRef packedValueDocID;
final int packedValueLength;
OfflinePointValue(byte[] value, int packedValueLength) {
this.packedValueLength = packedValueLength;
OfflinePointValue(BKDConfig config, byte[] value) {
this.packedValueLength = config.packedBytesLength;
this.packedValue = new BytesRef(value, 0, packedValueLength);
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
this.packedValueDocID = new BytesRef(value, 0, config.bytesPerDoc);
}
/**

View File

@ -34,25 +34,25 @@ public final class OfflinePointWriter implements PointWriter {
final Directory tempDir;
public final IndexOutput out;
public final String name;
final int packedBytesLength;
final BKDConfig config;
long count;
private boolean closed;
final long expectedCount;
/** Create a new writer with an unknown number of incoming points */
public OfflinePointWriter(Directory tempDir, String tempFileNamePrefix, int packedBytesLength,
public OfflinePointWriter(BKDConfig config, Directory tempDir, String tempFileNamePrefix,
String desc, long expectedCount) throws IOException {
this.out = tempDir.createTempOutput(tempFileNamePrefix, "bkd_" + desc, IOContext.DEFAULT);
this.name = out.getName();
this.tempDir = tempDir;
this.packedBytesLength = packedBytesLength;
this.config = config;
this.expectedCount = expectedCount;
}
@Override
public void append(byte[] packedValue, int docID) throws IOException {
assert closed == false : "Point writer is already closed";
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
assert packedValue.length == config.packedBytesLength : "[packedValue] must have length [" + config.packedBytesLength + "] but was [" + packedValue.length + "]";
out.writeBytes(packedValue, 0, packedValue.length);
out.writeInt(docID);
count++;
@ -63,7 +63,7 @@ public final class OfflinePointWriter implements PointWriter {
public void append(PointValue pointValue) throws IOException {
assert closed == false : "Point writer is already closed";
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
assert packedValueDocID.length == packedBytesLength + Integer.BYTES : "[packedValue and docID] must have length [" + (packedBytesLength + Integer.BYTES) + "] but was [" + packedValueDocID.length + "]";
assert packedValueDocID.length == config.bytesPerDoc : "[packedValue and docID] must have length [" + (config.bytesPerDoc) + "] but was [" + packedValueDocID.length + "]";
out.writeBytes(packedValueDocID.bytes, packedValueDocID.offset, packedValueDocID.length);
count++;
assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count;
@ -71,7 +71,7 @@ public final class OfflinePointWriter implements PointWriter {
@Override
public PointReader getReader(long start, long length) throws IOException {
byte[] buffer = new byte[packedBytesLength + Integer.BYTES];
byte[] buffer = new byte[config.bytesPerDoc];
return getReader(start, length, buffer);
}
@ -79,7 +79,7 @@ public final class OfflinePointWriter implements PointWriter {
assert closed: "point writer is still open and trying to get a reader";
assert start + length <= count: "start=" + start + " length=" + length + " count=" + count;
assert expectedCount == 0 || count == expectedCount;
return new OfflinePointReader(tempDir, name, packedBytesLength, start, length, reusableBuffer);
return new OfflinePointReader(config, tempDir, name, start, length, reusableBuffer);
}
@Override

View File

@ -40,7 +40,7 @@ import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;
import org.apache.lucene.util.bkd.BKDConfig;
public class TestLucene86PointsFormat extends BasePointsFormatTestCase {
@ -78,7 +78,7 @@ public class TestLucene86PointsFormat extends BasePointsFormatTestCase {
} else {
// standard issue
codec = defaultCodec;
maxPointsInLeafNode = BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
maxPointsInLeafNode = BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
}
}

View File

@ -27,7 +27,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.bkd.BKDWriter;
import org.apache.lucene.util.bkd.BKDConfig;
public class TestLatLonPointQueries extends BaseGeoPointTestCase {
@ -62,7 +62,7 @@ public class TestLatLonPointQueries extends BaseGeoPointTestCase {
}
public void testDistanceQueryWithInvertedIntersection() throws IOException {
final int numMatchingDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
final int numMatchingDocs = atLeast(10 * BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
try (Directory dir = newDirectory()) {

View File

@ -68,7 +68,7 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDWriter;
import org.apache.lucene.util.bkd.BKDConfig;
import org.junit.BeforeClass;
@LuceneTestCase.SuppressCodecs("SimpleText")
@ -2180,7 +2180,7 @@ public class TestPointQueries extends LuceneTestCase {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
final int numDims = TestUtil.nextInt(random(), 1, 3);
final int numDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); // we need multiple leaves to enable this optimization
final int numDocs = atLeast(10 * BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE); // we need multiple leaves to enable this optimization
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
int[] values = new int[numDims];
@ -2201,9 +2201,9 @@ public class TestPointQueries extends LuceneTestCase {
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
Arrays.fill(high, numDocs - 1);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
Arrays.fill(low, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE + 1);
Arrays.fill(low, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE + 1);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
Arrays.fill(high, numDocs - BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
Arrays.fill(high, numDocs - BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
r.close();

View File

@ -40,8 +40,8 @@ public class Test2BBKDPoints extends LuceneTestCase {
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 1, 1, Long.BYTES,
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, 26L * numDocs);
BKDWriter w = new BKDWriter(numDocs, dir, "_0", new BKDConfig(1, 1, Long.BYTES, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, 26L * numDocs);
int counter = 0;
byte[] packedBytes = new byte[Long.BYTES];
for (int docID = 0; docID < numDocs; docID++) {
@ -79,8 +79,8 @@ public class Test2BBKDPoints extends LuceneTestCase {
final int numDocs = (Integer.MAX_VALUE / 26) + 100;
BKDWriter w = new BKDWriter(numDocs, dir, "_0", 2, 2, Long.BYTES,
BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE, BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, 26L * numDocs);
BKDWriter w = new BKDWriter(numDocs, dir, "_0", new BKDConfig(2, 2, Long.BYTES, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, 26L * numDocs);
int counter = 0;
byte[] packedBytes = new byte[2*Long.BYTES];
for (int docID = 0; docID < numDocs; docID++) {

View File

@ -50,7 +50,7 @@ public class TestBKD extends LuceneTestCase {
public void testBasicInts1D() throws Exception {
try (Directory dir = getDirectory(100)) {
BKDWriter w = new BKDWriter(100, dir, "tmp", 1, 1, 4, 2, 1.0f, 100);
BKDWriter w = new BKDWriter(100, dir, "tmp", new BKDConfig(1, 1, 4, 2), 1.0f, 100);
byte[] scratch = new byte[4];
for(int docID=0;docID<100;docID++) {
NumericUtils.intToSortableBytes(docID, scratch, 0);
@ -128,7 +128,7 @@ public class TestBKD extends LuceneTestCase {
int numIndexDims = TestUtil.nextInt(random(), 1, numDims);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat());
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numIndexDims, 4, maxPointsInLeafNode, maxMB, numDocs);
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", new BKDConfig(numDims, numIndexDims, 4, maxPointsInLeafNode), maxMB, numDocs);
if (VERBOSE) {
System.out.println("TEST: numDims=" + numDims + " numIndexDims=" + numIndexDims + " numDocs=" + numDocs);
@ -271,7 +271,7 @@ public class TestBKD extends LuceneTestCase {
int numDims = TestUtil.nextInt(random(), 1, 5);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 100);
float maxMB = (float) 3.0 + (3*random().nextFloat());
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", numDims, numDims, numBytesPerDim, maxPointsInLeafNode, maxMB, numDocs);
BKDWriter w = new BKDWriter(numDocs, dir, "tmp", new BKDConfig(numDims, numDims, numBytesPerDim, maxPointsInLeafNode), maxMB, numDocs);
BigInteger[][] docs = new BigInteger[numDocs][];
byte[] scratch = new byte[numBytesPerDim*numDims];
@ -449,7 +449,7 @@ public class TestBKD extends LuceneTestCase {
public void testTooLittleHeap() throws Exception {
try (Directory dir = getDirectory(0)) {
IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
new BKDWriter(1, dir, "bkd", 1, 1, 16, 1000000, 0.001, 0);
new BKDWriter(1, dir, "bkd", new BKDConfig(1, 1, 16, 1000000), 0.001, 0);
});
assertTrue(expected.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
}
@ -707,7 +707,7 @@ public class TestBKD extends LuceneTestCase {
maxDocs = random().nextLong();
}
}
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode, maxMB, maxDocs);
BKDWriter w = new BKDWriter(numValues, dir, "_" + seg, new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode), maxMB, maxDocs);
IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
IndexInput in = null;
@ -769,7 +769,7 @@ public class TestBKD extends LuceneTestCase {
seg++;
maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 1000);
maxMB = (float) 3.0 + (3*random().nextDouble());
w = new BKDWriter(numValues, dir, "_" + seg, numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length);
w = new BKDWriter(numValues, dir, "_" + seg, new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode), maxMB, docValues.length);
lastDocIDBase = docID;
}
}
@ -792,7 +792,7 @@ public class TestBKD extends LuceneTestCase {
out.close();
in = dir.openInput("bkd", IOContext.DEFAULT);
seg++;
w = new BKDWriter(numValues, dir, "_" + seg, numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode, maxMB, docValues.length);
w = new BKDWriter(numValues, dir, "_" + seg, new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode), maxMB, docValues.length);
List<BKDReader> readers = new ArrayList<>();
for(long fp : toMerge) {
in.seek(fp);
@ -1077,7 +1077,7 @@ public class TestBKD extends LuceneTestCase {
public void testTieBreakOrder() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 10000;
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", 1, 1, Integer.BYTES, 2, 0.01f, numDocs);
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", new BKDConfig(1, 1, Integer.BYTES, 2), 0.01f, numDocs);
for(int i=0;i<numDocs;i++) {
w.add(new byte[Integer.BYTES], i);
}
@ -1133,7 +1133,7 @@ public class TestBKD extends LuceneTestCase {
System.arraycopy(pointValue1, i * numBytesPerDim, pointValue2, i * numBytesPerDim, numBytesPerDim);
}
BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode,
BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", new BKDConfig(numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode),
maxMB, 2 * numValues);
for (int i = 0; i < numValues; ++i) {
w.add(pointValue1, i);
@ -1193,7 +1193,7 @@ public class TestBKD extends LuceneTestCase {
public void test2DLongOrdsOffline() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 100000;
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", 2, 2, Integer.BYTES, 2, 0.01f, numDocs);
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", new BKDConfig(2, 2, Integer.BYTES, 2), 0.01f, numDocs);
byte[] buffer = new byte[2*Integer.BYTES];
for(int i=0;i<numDocs;i++) {
random().nextBytes(buffer);
@ -1247,7 +1247,7 @@ public class TestBKD extends LuceneTestCase {
Directory dir = newFSDirectory(createTempDir());
int numDocs = atLeast(10000);
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", numDims, numIndexDims, bytesPerDim, 32, 1f, numDocs);
BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", new BKDConfig(numDims, numIndexDims, bytesPerDim, 32), 1f, numDocs);
byte[] tmp = new byte[bytesUsed];
byte[] buffer = new byte[numDims * bytesPerDim];
for(int i=0;i<numDocs;i++) {
@ -1306,7 +1306,7 @@ public class TestBKD extends LuceneTestCase {
final byte[] uniquePointValue = new byte[numBytesPerDim];
random().nextBytes(uniquePointValue);
BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, 1, numBytesPerDim, maxPointsInLeafNode,
BKDWriter w = new BKDWriter(numValues, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, maxPointsInLeafNode),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
for (int i = 0; i < numValues; ++i) {
if (i == numValues / 2) {
@ -1468,7 +1468,7 @@ public class TestBKD extends LuceneTestCase {
}
};
BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, 1, numBytesPerDim, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
BKDWriter w = new BKDWriter(numValues, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
expectThrows(IllegalStateException.class, () -> {
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
@ -1486,7 +1486,7 @@ public class TestBKD extends LuceneTestCase {
final int numPointsAdded = 50; // exceeds totalPointCount
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final byte[] pointValue = new byte[numBytesPerDim];
BKDWriter w = new BKDWriter(numValues, dir, "_temp", 1, 1, numBytesPerDim, 2,
BKDWriter w = new BKDWriter(numValues, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, 2),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
for(int i=0;i<numValues;i++) {
random().nextBytes(pointValue);
@ -1505,7 +1505,7 @@ public class TestBKD extends LuceneTestCase {
final int numPointsAdded = 50; // exceeds totalPointCount
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final byte[][] pointValue = new byte[11][numBytesPerDim];
BKDWriter w = new BKDWriter(numValues + 1, dir, "_temp", 1, 1, numBytesPerDim, 2,
BKDWriter w = new BKDWriter(numValues + 1, dir, "_temp", new BKDConfig(1, 1, numBytesPerDim, 2),
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP, numValues);
for(int i=0;i<numValues + 1;i++) {
random().nextBytes(pointValue[i]);
@ -1589,5 +1589,4 @@ public class TestBKD extends LuceneTestCase {
}
dir.close();
}
}

View File

@ -33,11 +33,11 @@ public class TestBKDRadixSelector extends LuceneTestCase {
int values = 4;
Directory dir = getDirectory(values);
int middle = 2;
int dimensions =1;
int dimensions = 1;
int bytesPerDimensions = Integer.BYTES;
int packedLength = dimensions * bytesPerDimensions;
PointWriter points = getRandomPointWriter(dir, values, packedLength);
byte[] value = new byte[packedLength];
BKDConfig config = new BKDConfig(dimensions, dimensions, bytesPerDimensions, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
PointWriter points = getRandomPointWriter(config, dir, values);
byte[] value = new byte[config.packedBytesLength];
NumericUtils.intToSortableBytes(1, value, 0);
points.append(value, 0);
NumericUtils.intToSortableBytes(2, value, 0);
@ -47,8 +47,8 @@ public class TestBKDRadixSelector extends LuceneTestCase {
NumericUtils.intToSortableBytes(4, value, 0);
points.append(value, 3);
points.close();
PointWriter copy = copyPoints(dir,points, packedLength);
verify(dir, copy, dimensions, dimensions, 0, values, middle, packedLength, bytesPerDimensions, 0);
PointWriter copy = copyPoints(config, dir,points);
verify(config, dir, copy, 0, values, middle, 0);
dir.close();
}
@ -66,6 +66,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
}
private void doTestRandomBinary(int count) throws IOException {
BKDConfig config = getRandomConfig();
int values = TestUtil.nextInt(random(), count, count*2);
Directory dir = getDirectory(values);
int start;
@ -79,31 +80,27 @@ public class TestBKDRadixSelector extends LuceneTestCase {
}
int partitionPoint = TestUtil.nextInt(random(), start + 1, end - 1);
int sortedOnHeap = random().nextInt(5000);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
int packedLength = dataDimensions * bytesPerDimensions;
PointWriter points = getRandomPointWriter(dir, values, packedLength);
byte[] value = new byte[packedLength];
PointWriter points = getRandomPointWriter(config, dir, values);
byte[] value = new byte[config.packedBytesLength];
for (int i =0; i < values; i++) {
random().nextBytes(value);
points.append(value, i);
}
points.close();
verify(dir, points, dataDimensions, indexDimensions, start, end, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
verify(config, dir, points,start, end, partitionPoint, sortedOnHeap);
dir.close();
}
public void testRandomAllDimensionsEquals() throws IOException {
int dimensions = TestUtil.nextInt(random(), 1, BKDConfig.MAX_INDEX_DIMS);
int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
BKDConfig config = new BKDConfig(dimensions, dimensions, bytesPerDimensions, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
int values = TestUtil.nextInt(random(), 15000, 20000);
Directory dir = getDirectory(values);
int partitionPoint = random().nextInt(values);
int sortedOnHeap = random().nextInt(5000);
int dimensions = TestUtil.nextInt(random(), 1, 8);
int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
int packedLength = dimensions * bytesPerDimensions;
PointWriter points = getRandomPointWriter(dir, values, packedLength);
byte[] value = new byte[packedLength];
PointWriter points = getRandomPointWriter(config, dir, values);
byte[] value = new byte[config.packedBytesLength];
random().nextBytes(value);
for (int i =0; i < values; i++) {
if (random().nextBoolean()) {
@ -113,7 +110,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
}
}
points.close();
verify(dir, points, dimensions, dimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
verify(config, dir, points, 0, values, partitionPoint, sortedOnHeap);
dir.close();
}
@ -122,12 +119,9 @@ public class TestBKDRadixSelector extends LuceneTestCase {
Directory dir = getDirectory(values);
int partitionPoint = random().nextInt(values);
int sortedOnHeap = random().nextInt(5000);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
int packedLength = dataDimensions * bytesPerDimensions;
PointWriter points = getRandomPointWriter(dir, values, packedLength);
byte[] value = new byte[packedLength];
BKDConfig config = getRandomConfig();
PointWriter points = getRandomPointWriter(config, dir, values);
byte[] value = new byte[config.packedBytesLength];
random().nextBytes(value);
for (int i =0; i < values; i++) {
if (random().nextBoolean()) {
@ -137,7 +131,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
}
}
points.close();
verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
verify(config, dir, points, 0, values, partitionPoint, sortedOnHeap);
dir.close();
}
@ -146,33 +140,27 @@ public class TestBKDRadixSelector extends LuceneTestCase {
Directory dir = getDirectory(values);
int partitionPoint = random().nextInt(values);
int sortedOnHeap = random().nextInt(5000);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
int packedLength = dataDimensions * bytesPerDimensions;
PointWriter points = getRandomPointWriter(dir, values, packedLength);
byte[] value = new byte[packedLength];
BKDConfig config = getRandomConfig();
PointWriter points = getRandomPointWriter(config, dir, values);
byte[] value = new byte[config.packedBytesLength];
random().nextBytes(value);
for (int i =0; i < values; i++) {
points.append(value, 0);
}
points.close();
verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
verify(config, dir, points, 0, values, partitionPoint, sortedOnHeap);
dir.close();
}
public void testRandomFewDifferentValues() throws IOException {
BKDConfig config = getRandomConfig();
int values = atLeast(15000);
Directory dir = getDirectory(values);
int partitionPoint = random().nextInt(values);
int sortedOnHeap = random().nextInt(5000);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
int packedLength = dataDimensions * bytesPerDimensions;
PointWriter points = getRandomPointWriter(dir, values, packedLength);
PointWriter points = getRandomPointWriter(config, dir, values);
int numberValues = random().nextInt(8) + 2;
byte[][] differentValues = new byte[numberValues][packedLength];
byte[][] differentValues = new byte[numberValues][config.packedBytesLength];
for (int i =0; i < numberValues; i++) {
random().nextBytes(differentValues[i]);
}
@ -180,57 +168,56 @@ public class TestBKDRadixSelector extends LuceneTestCase {
points.append(differentValues[random().nextInt(numberValues)], i);
}
points.close();
verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
verify(config, dir, points, 0, values, partitionPoint, sortedOnHeap);
dir.close();
}
public void testRandomDataDimDiffValues() throws IOException {
BKDConfig config = getRandomConfig();
int values = atLeast(15000);
Directory dir = getDirectory(values);
int partitionPoint = random().nextInt(values);
int sortedOnHeap = random().nextInt(5000);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDimensions = TestUtil.nextInt(random(), 2, 30);
int packedLength = dataDimensions * bytesPerDimensions;
PointWriter points = getRandomPointWriter(dir, values, packedLength);
byte[] value = new byte[packedLength];
byte[] dataValue = new byte[(dataDimensions - indexDimensions) * bytesPerDimensions];
PointWriter points = getRandomPointWriter(config, dir, values);
byte[] value = new byte[config.packedBytesLength];
int dataOnlyDims = config.numDims - config.numIndexDims;
byte[] dataValue = new byte[dataOnlyDims * config.bytesPerDim];
random().nextBytes(value);
for (int i =0; i < values; i++) {
random().nextBytes(dataValue);
System.arraycopy(dataValue, 0, value, indexDimensions * bytesPerDimensions, (dataDimensions - indexDimensions) * bytesPerDimensions);
System.arraycopy(dataValue, 0, value, config.numIndexDims * config.bytesPerDim, dataOnlyDims * config.bytesPerDim);
points.append(value, i);
}
points.close();
verify(dir, points, dataDimensions, indexDimensions, 0, values, partitionPoint, packedLength, bytesPerDimensions, sortedOnHeap);
verify(config, dir, points, 0, values, partitionPoint, sortedOnHeap);
dir.close();
}
private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
//we only split by indexed dimension so we check for each only those dimension
for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
//We need to make a copy of the data as it is deleted in the process
BKDRadixSelector.PathSlice inputSlice = new BKDRadixSelector.PathSlice(copyPoints(dir, points, packedLength), 0, points.count());
int commonPrefixLengthInput = getRandomCommonPrefix(inputSlice, bytesPerDimensions, splitDim);
private void verify(BKDConfig config, Directory dir, PointWriter points, long start, long end, long middle, int sortedOnHeap) throws IOException{
BKDRadixSelector radixSelector = new BKDRadixSelector(config, sortedOnHeap, dir, "test");
int dataOnlyDims = config.numDims - config.numIndexDims;
// we only split by indexed dimension so we check for each only those dimension
for (int splitDim = 0; splitDim < config.numIndexDims; splitDim++) {
// We need to make a copy of the data as it is deleted in the process
BKDRadixSelector.PathSlice inputSlice = new BKDRadixSelector.PathSlice(copyPoints(config, dir, points), 0, points.count());
int commonPrefixLengthInput = getRandomCommonPrefix(config, inputSlice, splitDim);
BKDRadixSelector.PathSlice[] slices = new BKDRadixSelector.PathSlice[2];
byte[] partitionPoint = radixSelector.select(inputSlice, slices, start, end, middle, splitDim, commonPrefixLengthInput);
assertEquals(middle - start, slices[0].count);
assertEquals(end - middle, slices[1].count);
//check that left and right slices contain the correct points
byte[] max = getMax(slices[0], bytesPerDimensions, splitDim);
byte[] min = getMin(slices[1], bytesPerDimensions, splitDim);
int cmp = Arrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions);
// check that left and right slices contain the correct points
byte[] max = getMax(config, slices[0], splitDim);
byte[] min = getMin(config, slices[1], splitDim);
int cmp = Arrays.compareUnsigned(max, 0, config.bytesPerDim, min, 0, config.bytesPerDim);
assertTrue(cmp <= 0);
if (cmp == 0) {
byte[] maxDataDim = getMaxDataDimension(slices[0], bytesPerDimensions, dataDimensions, indexDimensions, max, splitDim);
byte[] minDataDim = getMinDataDimension(slices[1], bytesPerDimensions, dataDimensions, indexDimensions, min, splitDim);
cmp = Arrays.compareUnsigned(maxDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions, minDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions);
byte[] maxDataDim = getMaxDataDimension(config, slices[0], max, splitDim);
byte[] minDataDim = getMinDataDimension(config, slices[1], min, splitDim);
cmp = Arrays.compareUnsigned(maxDataDim, 0, dataOnlyDims * config.bytesPerDim, minDataDim, 0, dataOnlyDims * config.bytesPerDim);
assertTrue(cmp <= 0);
if (cmp == 0) {
int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,maxDataDim);
int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,minDataDim);
int maxDocID = getMaxDocId(config, slices[0], splitDim, partitionPoint, maxDataDim);
int minDocId = getMinDocId(config, slices[1], splitDim, partitionPoint, minDataDim);
assertTrue(minDocId >= maxDocID);
}
}
@ -241,8 +228,8 @@ public class TestBKDRadixSelector extends LuceneTestCase {
points.destroy();
}
private PointWriter copyPoints(Directory dir, PointWriter points, int packedLength) throws IOException {
try (PointWriter copy = getRandomPointWriter(dir, points.count(), packedLength);
private PointWriter copyPoints(BKDConfig config, Directory dir, PointWriter points) throws IOException {
try (PointWriter copy = getRandomPointWriter(config, dir, points.count());
PointReader reader = points.getReader(0, points.count())) {
while (reader.next()) {
copy.append(reader.pointValue());
@ -252,21 +239,21 @@ public class TestBKDRadixSelector extends LuceneTestCase {
}
/** returns a common prefix length equal or lower than the current one */
private int getRandomCommonPrefix(BKDRadixSelector.PathSlice inputSlice, int bytesPerDimension, int splitDim) throws IOException {
byte[] pointsMax = getMax(inputSlice, bytesPerDimension, splitDim);
byte[] pointsMin = getMin(inputSlice, bytesPerDimension, splitDim);
int commonPrefixLength = Arrays.mismatch(pointsMin, 0, bytesPerDimension, pointsMax, 0, bytesPerDimension);
private int getRandomCommonPrefix(BKDConfig config, BKDRadixSelector.PathSlice inputSlice, int splitDim) throws IOException {
byte[] pointsMax = getMax(config, inputSlice, splitDim);
byte[] pointsMin = getMin(config, inputSlice, splitDim);
int commonPrefixLength = Arrays.mismatch(pointsMin, 0, config.bytesPerDim, pointsMax, 0, config.bytesPerDim);
if (commonPrefixLength == -1) {
commonPrefixLength = bytesPerDimension;
commonPrefixLength = config.bytesPerDim;
}
return (random().nextBoolean()) ? commonPrefixLength : commonPrefixLength == 0 ? 0 : random().nextInt(commonPrefixLength);
}
private PointWriter getRandomPointWriter(Directory dir, long numPoints, int packedBytesLength) throws IOException {
private PointWriter getRandomPointWriter(BKDConfig config, Directory dir, long numPoints) throws IOException {
if (numPoints < 4096 && random().nextBoolean()) {
return new HeapPointWriter(Math.toIntExact(numPoints), packedBytesLength);
return new HeapPointWriter(config, Math.toIntExact(numPoints));
} else {
return new OfflinePointWriter(dir, "test", packedBytesLength, "data", numPoints);
return new OfflinePointWriter(config, dir, "test", "data", numPoints);
}
}
@ -280,34 +267,34 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return dir;
}
private byte[] getMin(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException {
byte[] min = new byte[bytesPerDimension];
private byte[] getMin(BKDConfig config, BKDRadixSelector.PathSlice pathSlice, int dimension) throws IOException {
byte[] min = new byte[config.bytesPerDim];
Arrays.fill(min, (byte) 0xff);
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
byte[] value = new byte[bytesPerDimension];
try (PointReader reader = pathSlice.writer.getReader(pathSlice.start, pathSlice.count)) {
byte[] value = new byte[config.bytesPerDim];
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
System.arraycopy(packedValue.bytes, packedValue.offset + dimension * bytesPerDimension, value, 0, bytesPerDimension);
if (Arrays.compareUnsigned(min, 0, bytesPerDimension, value, 0, bytesPerDimension) > 0) {
System.arraycopy(value, 0, min, 0, bytesPerDimension);
System.arraycopy(packedValue.bytes, packedValue.offset + dimension * config.bytesPerDim, value, 0, config.bytesPerDim);
if (Arrays.compareUnsigned(min, 0, config.bytesPerDim, value, 0, config.bytesPerDim) > 0) {
System.arraycopy(value, 0, min, 0, config.bytesPerDim);
}
}
}
return min;
}
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
private int getMinDocId(BKDConfig config, BKDRadixSelector.PathSlice p, int dimension, byte[] partitionPoint, byte[] dataDim) throws IOException {
int docID = Integer.MAX_VALUE;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
int offset = dimension * bytesPerDimension;
int dataOffset = indexDims * bytesPerDimension;
int dataLength = (dataDims - indexDims) * bytesPerDimension;
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
int offset = dimension * config.bytesPerDim;
int dataOffset = config.packedIndexBytesLength;
int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + config.bytesPerDim, partitionPoint, 0, config.bytesPerDim) == 0
&& Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
int newDocID = pointValue.docID();
if (newDocID < docID) {
@ -319,19 +306,20 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return docID;
}
private byte[] getMinDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] minDim, int splitDim) throws IOException {
byte[] min = new byte[(dataDims - indexDims) * bytesPerDimension];
private byte[] getMinDataDimension(BKDConfig config, BKDRadixSelector.PathSlice p, byte[] minDim, int splitDim) throws IOException {
final int numDataDims = config.numDims - config.numIndexDims;
byte[] min = new byte[numDataDims * config.bytesPerDim];
Arrays.fill(min, (byte) 0xff);
int offset = splitDim * bytesPerDimension;
int offset = splitDim * config.bytesPerDim;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
byte[] value = new byte[numDataDims * config.bytesPerDim];
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
if (Arrays.mismatch(minDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
if (Arrays.compareUnsigned(min, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) > 0) {
System.arraycopy(value, 0, min, 0, (dataDims - indexDims) * bytesPerDimension);
if (Arrays.mismatch(minDim, 0, config.bytesPerDim, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + config.bytesPerDim) == -1) {
System.arraycopy(packedValue.bytes, packedValue.offset + config.numIndexDims * config.bytesPerDim, value, 0, numDataDims * config.bytesPerDim);
if (Arrays.compareUnsigned(min, 0, numDataDims * config.bytesPerDim, value, 0, numDataDims * config.bytesPerDim) > 0) {
System.arraycopy(value, 0, min, 0, numDataDims * config.bytesPerDim);
}
}
}
@ -339,36 +327,37 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return min;
}
private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException {
byte[] max = new byte[bytesPerDimension];
private byte[] getMax(BKDConfig config, BKDRadixSelector.PathSlice p, int dimension) throws IOException {
byte[] max = new byte[config.bytesPerDim];
Arrays.fill(max, (byte) 0);
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
byte[] value = new byte[bytesPerDimension];
byte[] value = new byte[config.bytesPerDim];
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
System.arraycopy(packedValue.bytes, packedValue.offset + dimension * bytesPerDimension, value, 0, bytesPerDimension);
if (Arrays.compareUnsigned(max, 0, bytesPerDimension, value, 0, bytesPerDimension) < 0) {
System.arraycopy(value, 0, max, 0, bytesPerDimension);
System.arraycopy(packedValue.bytes, packedValue.offset + dimension * config.bytesPerDim, value, 0, config.bytesPerDim);
if (Arrays.compareUnsigned(max, 0, config.bytesPerDim, value, 0, config.bytesPerDim) < 0) {
System.arraycopy(value, 0, max, 0, config.bytesPerDim);
}
}
}
return max;
}
private byte[] getMaxDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] maxDim, int splitDim) throws IOException {
byte[] max = new byte[(dataDims - indexDims) * bytesPerDimension];
private byte[] getMaxDataDimension(BKDConfig config, BKDRadixSelector.PathSlice p, byte[] maxDim, int splitDim) throws IOException {
final int numDataDims = config.numDims - config.numIndexDims;
byte[] max = new byte[numDataDims * config.bytesPerDim];
Arrays.fill(max, (byte) 0);
int offset = splitDim * bytesPerDimension;
int offset = splitDim * config.bytesPerDim;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
byte[] value = new byte[numDataDims * config.bytesPerDim];
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
if (Arrays.mismatch(maxDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
if (Arrays.compareUnsigned(max, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) < 0) {
System.arraycopy(value, 0, max, 0, (dataDims - indexDims) * bytesPerDimension);
if (Arrays.mismatch(maxDim, 0, config.bytesPerDim, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + config.bytesPerDim) == -1) {
System.arraycopy(packedValue.bytes, packedValue.offset + config.packedIndexBytesLength, value, 0, numDataDims * config.bytesPerDim);
if (Arrays.compareUnsigned(max, 0, numDataDims * config.bytesPerDim, value, 0, numDataDims * config.bytesPerDim) < 0) {
System.arraycopy(value, 0, max, 0, numDataDims * config.bytesPerDim);
}
}
}
@ -376,16 +365,16 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return max;
}
private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
private int getMaxDocId(BKDConfig config, BKDRadixSelector.PathSlice p, int dimension, byte[] partitionPoint, byte[] dataDim) throws IOException {
int docID = Integer.MIN_VALUE;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
int offset = dimension * bytesPerDimension;
int dataOffset = indexDims * bytesPerDimension;
int dataLength = (dataDims - indexDims) * bytesPerDimension;
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
int offset = dimension * config.bytesPerDim;
int dataOffset = config.packedIndexBytesLength;
int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
if (Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + config.bytesPerDim, partitionPoint, 0, config.bytesPerDim) == 0
&& Arrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
int newDocID = pointValue.docID();
if (newDocID > docID) {
@ -396,4 +385,12 @@ public class TestBKDRadixSelector extends LuceneTestCase {
}
return docID;
}
private BKDConfig getRandomConfig() {
int numIndexDims = TestUtil.nextInt(random(), 1, BKDConfig.MAX_INDEX_DIMS);
int numDims = TestUtil.nextInt(random(), numIndexDims, BKDConfig.MAX_DIMS);
int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 2000);
return new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
}
}

View File

@ -28,43 +28,34 @@ import org.apache.lucene.util.TestUtil;
public class TestBKDRadixSort extends LuceneTestCase {
public void testRandom() throws IOException {
int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
int packedBytesLength = dataDimensions * bytesPerDim;
HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
byte[] value = new byte[packedBytesLength];
BKDConfig config = getRandomConfig();
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
HeapPointWriter points = new HeapPointWriter(config, numPoints);
byte[] value = new byte[config.packedBytesLength];
for (int i = 0; i < numPoints; i++) {
random().nextBytes(value);
points.append(value, i);
}
verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
verifySort(config, points, 0, numPoints);
}
public void testRandomAllEquals() throws IOException {
int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
int packedBytesLength = dataDimensions * bytesPerDim;
HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
byte[] value = new byte[packedBytesLength];
BKDConfig config = getRandomConfig();
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
HeapPointWriter points = new HeapPointWriter(config, numPoints);
byte[] value = new byte[config.packedBytesLength];
random().nextBytes(value);
for (int i = 0; i < numPoints; i++) {
points.append(value, random().nextInt(numPoints));
}
verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
verifySort(config, points, 0, numPoints);
}
public void testRandomLastByteTwoValues() throws IOException {
int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
int packedBytesLength = dataDimensions * bytesPerDim;
HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
byte[] value = new byte[packedBytesLength];
BKDConfig config = getRandomConfig();
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
HeapPointWriter points = new HeapPointWriter(config, numPoints);
byte[] value = new byte[config.packedBytesLength];
random().nextBytes(value);
for (int i = 0; i < numPoints; i++) {
if (random().nextBoolean()) {
@ -73,71 +64,64 @@ public class TestBKDRadixSort extends LuceneTestCase {
points.append(value, 2);
}
}
verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
verifySort(config, points, 0, numPoints);
}
public void testRandomFewDifferentValues() throws IOException {
int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
int packedBytesLength = dataDimensions * bytesPerDim;
HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
BKDConfig config = getRandomConfig();
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
HeapPointWriter points = new HeapPointWriter(config, numPoints);
int numberValues = random().nextInt(8) + 2;
byte[][] differentValues = new byte[numberValues][packedBytesLength];
byte[][] differentValues = new byte[numberValues][config.packedBytesLength];
for (int i = 0; i < numberValues; i++) {
random().nextBytes(differentValues[i]);
}
for (int i = 0; i < numPoints; i++) {
points.append(differentValues[random().nextInt(numberValues)], i);
}
verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
verifySort(config, points, 0, numPoints);
}
public void testRandomDataDimDifferent() throws IOException {
int numPoints = TestUtil.nextInt(random(), 1, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
int indexDimensions = TestUtil.nextInt(random(), 1, 8);
int dataDimensions = TestUtil.nextInt(random(), indexDimensions, 8);
int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
int packedBytesLength = dataDimensions * bytesPerDim;
HeapPointWriter points = new HeapPointWriter(numPoints, packedBytesLength);
byte[] value = new byte[packedBytesLength];
int totalDataDimension = dataDimensions - indexDimensions;
byte[] dataDimensionValues = new byte[totalDataDimension * bytesPerDim];
BKDConfig config = getRandomConfig();
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
HeapPointWriter points = new HeapPointWriter(config, numPoints);
byte[] value = new byte[config.packedBytesLength];
int totalDataDimension = config.numDims - config.numIndexDims;
byte[] dataDimensionValues = new byte[totalDataDimension * config.bytesPerDim];
random().nextBytes(value);
for (int i = 0; i < numPoints; i++) {
random().nextBytes(dataDimensionValues);
System.arraycopy(dataDimensionValues, 0, value, indexDimensions * bytesPerDim, totalDataDimension * bytesPerDim);
System.arraycopy(dataDimensionValues, 0, value, config.packedIndexBytesLength, totalDataDimension * config.bytesPerDim);
points.append(value, random().nextInt(numPoints));
}
verifySort(points, dataDimensions, indexDimensions, 0, numPoints, bytesPerDim);
verifySort(config, points, 0, numPoints);
}
private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
int packedBytesLength = dataDimensions * bytesPerDim;
private void verifySort(BKDConfig config, HeapPointWriter points,int start, int end) throws IOException{
Directory dir = newDirectory();
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDim, 1000, dir, "test");
BKDRadixSelector radixSelector = new BKDRadixSelector(config, 1000, dir, "test");
// we check for each dimension
for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
byte[] previous = new byte[bytesPerDim * dataDimensions];
for (int splitDim = 0; splitDim < config.numDims; splitDim++) {
radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(config, points, start, end, splitDim));
byte[] previous = new byte[config.packedBytesLength];
int previousDocId = -1;
Arrays.fill(previous, (byte) 0);
int dimOffset = splitDim * bytesPerDim;
int dimOffset = splitDim * config.bytesPerDim;
for (int j = start; j < end; j++) {
PointValue pointValue = points.getPackedValueSlice(j);
BytesRef value = pointValue.packedValue();
int cmp = Arrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
int cmp = Arrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + config.bytesPerDim, previous, dimOffset, dimOffset + config.bytesPerDim);
assertTrue(cmp >= 0);
if (cmp == 0) {
int dataOffset = indexDimensions * bytesPerDim;
cmp = Arrays.compareUnsigned(value.bytes, value.offset + dataOffset, value.offset + packedBytesLength, previous, dataOffset, packedBytesLength);
int dataOffset = config.numIndexDims * config.bytesPerDim;
cmp = Arrays.compareUnsigned(value.bytes, value.offset + dataOffset, value.offset + config.packedBytesLength, previous, dataOffset, config.packedBytesLength);
assertTrue(cmp >= 0);
}
if (cmp == 0) {
assertTrue(pointValue.docID() >= previousDocId);
}
System.arraycopy(value.bytes, value.offset, previous, 0, packedBytesLength);
System.arraycopy(value.bytes, value.offset, previous, 0, config.packedBytesLength);
previousDocId = pointValue.docID();
}
}
@ -145,17 +129,17 @@ public class TestBKDRadixSort extends LuceneTestCase {
}
/** returns a common prefix length equal or lower than the current one */
private int getRandomCommonPrefix(HeapPointWriter points, int start, int end, int bytesPerDimension, int sortDim) {
int commonPrefixLength = bytesPerDimension;
private int getRandomCommonPrefix(BKDConfig config, HeapPointWriter points, int start, int end, int sortDim) {
int commonPrefixLength = config.bytesPerDim;
PointValue value = points.getPackedValueSlice(start);
BytesRef bytesRef = value.packedValue();
byte[] firstValue = new byte[bytesPerDimension];
int offset = sortDim * bytesPerDimension;
System.arraycopy(bytesRef.bytes, bytesRef.offset + offset, firstValue, 0, bytesPerDimension);
byte[] firstValue = new byte[config.bytesPerDim];
int offset = sortDim * config.bytesPerDim;
System.arraycopy(bytesRef.bytes, bytesRef.offset + offset, firstValue, 0, config.bytesPerDim);
for (int i = start + 1; i < end; i++) {
value = points.getPackedValueSlice(i);
bytesRef = value.packedValue();
int diff = Arrays.mismatch(bytesRef.bytes, bytesRef.offset + offset, bytesRef.offset + offset + bytesPerDimension, firstValue, 0, bytesPerDimension);
int diff = Arrays.mismatch(bytesRef.bytes, bytesRef.offset + offset, bytesRef.offset + offset + config.bytesPerDim, firstValue, 0, config.bytesPerDim);
if (diff != -1 && commonPrefixLength > diff) {
if (diff == 0) {
return diff;
@ -165,4 +149,12 @@ public class TestBKDRadixSort extends LuceneTestCase {
}
return (random().nextBoolean()) ? commonPrefixLength : random().nextInt(commonPrefixLength);
}
private BKDConfig getRandomConfig() {
int numIndexDims = TestUtil.nextInt(random(), 1, BKDConfig.MAX_INDEX_DIMS);
int numDims = TestUtil.nextInt(random(), numIndexDims, BKDConfig.MAX_DIMS);
int bytesPerDim = TestUtil.nextInt(random(), 2, 30);
int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 2000);
return new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
}
}

View File

@ -37,9 +37,10 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
private void doTestSort() {
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(1, 1, bytesPerDim, maxDoc, new int[1]);
BKDConfig config = new BKDConfig(1, 1, bytesPerDim, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
Point[] points = createRandomPoints(config, maxDoc, new int[1]);
DummyPointsReader reader = new DummyPointsReader(points);
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
MutablePointsReaderUtils.sort(config, maxDoc, reader, 0, points.length);
Arrays.sort(points, new Comparator<Point>() {
@Override
public int compare(Point o1, Point o2) {
@ -61,24 +62,22 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
}
private void doTestSortByDim() {
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
BKDConfig config = createRandomConfig();
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
int[] commonPrefixLengths = new int[numDataDims];
Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
int[] commonPrefixLengths = new int[config.numDims];
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
DummyPointsReader reader = new DummyPointsReader(points);
final int sortedDim = random().nextInt(numIndexDims);
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
final int sortedDim = random().nextInt(config.numIndexDims);
MutablePointsReaderUtils.sortByDim(config, sortedDim, commonPrefixLengths, reader, 0, points.length,
new BytesRef(), new BytesRef());
for (int i = 1; i < points.length; ++i) {
final int offset = sortedDim * bytesPerDim;
final int offset = sortedDim * config.bytesPerDim;
BytesRef previousValue = reader.points[i-1].packedValue;
BytesRef currentValue = reader.points[i].packedValue;
int cmp = Arrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim);
int cmp = Arrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + config.bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + config.bytesPerDim);
if (cmp == 0) {
int dataDimOffset = numIndexDims * bytesPerDim;
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
int dataDimOffset = config.packedIndexBytesLength;
int dataDimsLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
cmp = Arrays.compareUnsigned(previousValue.bytes, previousValue.offset + dataDimOffset, previousValue.offset + dataDimOffset + dataDimsLength,
currentValue.bytes, currentValue.offset + dataDimOffset, currentValue.offset + dataDimOffset + dataDimsLength);
if (cmp == 0) {
@ -96,26 +95,24 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
}
private void doTestPartition() {
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
int[] commonPrefixLengths = new int[numDataDims];
BKDConfig config = createRandomConfig();
int[] commonPrefixLengths = new int[config.numDims];
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
final int splitDim = random().nextInt(numIndexDims);
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
final int splitDim = random().nextInt(config.numIndexDims);
DummyPointsReader reader = new DummyPointsReader(points);
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLengths[splitDim], reader, 0, points.length, pivot,
MutablePointsReaderUtils.partition(config, maxDoc, splitDim, commonPrefixLengths[splitDim], reader, 0, points.length, pivot,
new BytesRef(), new BytesRef());
BytesRef pivotValue = reader.points[pivot].packedValue;
int offset = splitDim * bytesPerDim;
int offset = splitDim * config.bytesPerDim;
for (int i = 0; i < points.length; ++i) {
BytesRef value = reader.points[i].packedValue;
int cmp = Arrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim,
pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
int cmp = Arrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + config.bytesPerDim,
pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + config.bytesPerDim);
if (cmp == 0) {
int dataDimOffset = numIndexDims * bytesPerDim;
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
int dataDimOffset = config.packedIndexBytesLength;
int dataDimsLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
cmp = Arrays.compareUnsigned(value.bytes, value.offset + dataDimOffset, value.offset + dataDimOffset + dataDimsLength,
pivotValue.bytes, pivotValue.offset + dataDimOffset, pivotValue.offset + dataDimOffset + dataDimsLength);
if (cmp == 0) {
@ -132,50 +129,58 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
}
}
private static Point[] createRandomPoints(int numDataDims, int numIndexdims, int bytesPerDim, int maxDoc, int[] commonPrefixLengths) {
assertTrue(commonPrefixLengths.length == numDataDims);
final int packedBytesLength = numDataDims * bytesPerDim;
private static BKDConfig createRandomConfig() {
final int numIndexDims = TestUtil.nextInt(random(), 1, BKDConfig.MAX_INDEX_DIMS);
final int numDims = TestUtil.nextInt(random(), numIndexDims, BKDConfig.MAX_DIMS);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 2000);
return new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
}
private static Point[] createRandomPoints(BKDConfig config, int maxDoc, int[] commonPrefixLengths) {
assertTrue(commonPrefixLengths.length == config.numDims);
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
Point[] points = new Point[numPoints];
if (random().nextInt(5) != 0) {
for (int i = 0; i < numPoints; ++i) {
byte[] value = new byte[packedBytesLength];
byte[] value = new byte[config.packedBytesLength];
random().nextBytes(value);
points[i] = new Point(value, random().nextInt(maxDoc));
}
for (int i = 0; i < numDataDims; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
for (int i = 0; i < config.numDims; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = 0; dim < numDataDims; ++dim) {
int offset = dim * bytesPerDim;
for (int dim = 0; dim < config.numDims; ++dim) {
int offset = dim * config.bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}
}
} else {
//index dim are equal, data dims different
byte[] indexDims = new byte[numIndexdims * bytesPerDim];
int numDataDims = config.numDims - config.numIndexDims;
byte[] indexDims = new byte[config.packedIndexBytesLength];
random().nextBytes(indexDims);
byte[] dataDims = new byte[(numDataDims - numIndexdims) * bytesPerDim];
byte[] dataDims = new byte[numDataDims * config.bytesPerDim];
for (int i = 0; i < numPoints; ++i) {
byte[] value = new byte[packedBytesLength];
System.arraycopy(indexDims, 0, value, 0, numIndexdims * bytesPerDim);
byte[] value = new byte[config.packedBytesLength];
System.arraycopy(indexDims, 0, value, 0, config.packedIndexBytesLength);
random().nextBytes(dataDims);
System.arraycopy(dataDims, 0, value, numIndexdims * bytesPerDim, (numDataDims - numIndexdims) * bytesPerDim);
System.arraycopy(dataDims, 0, value, config.packedIndexBytesLength, numDataDims * config.bytesPerDim);
points[i] = new Point(value, random().nextInt(maxDoc));
}
for (int i = 0; i < numIndexdims; ++i) {
commonPrefixLengths[i] = bytesPerDim;
for (int i = 0; i < config.numIndexDims; ++i) {
commonPrefixLengths[i] = config.bytesPerDim;
}
for (int i = numDataDims; i < numDataDims; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
for (int i = config.numIndexDims; i < config.numDims; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = numIndexdims; dim < numDataDims; ++dim) {
int offset = dim * bytesPerDim;
for (int dim = config.numIndexDims; dim < config.numDims; ++dim) {
int offset = dim * config.bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}

View File

@ -49,6 +49,7 @@ import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.bkd.BKDConfig;
import org.apache.lucene.util.bkd.BKDWriter;
/**
@ -105,13 +106,16 @@ public class RandomCodec extends AssertingCodec {
PointValues values = reader.getValues(fieldInfo.name);
BKDConfig config = new BKDConfig(fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode);
try (BKDWriter writer = new RandomlySplittingBKDWriter(writeState.segmentInfo.maxDoc(),
writeState.directory,
writeState.segmentInfo.name,
fieldInfo.getPointDimensionCount(),
fieldInfo.getPointIndexDimensionCount(),
fieldInfo.getPointNumBytes(),
maxPointsInLeafNode,
config,
maxMBSortInHeap,
values.size(),
bkdSplitRandomSeed ^ fieldInfo.name.hashCode())) {
@ -256,10 +260,9 @@ public class RandomCodec extends AssertingCodec {
final Random random;
public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDataDims, int numIndexDims,
int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap,
public RandomlySplittingBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, BKDConfig config, double maxMBSortInHeap,
long totalPointCount, int randomSeed) throws IOException {
super(maxDoc, tempDir, tempFileNamePrefix, numDataDims, numIndexDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
super(maxDoc, tempDir, tempFileNamePrefix, config, maxMBSortInHeap, totalPointCount);
this.random = new Random(randomSeed);
}
@ -284,7 +287,7 @@ public class RandomCodec extends AssertingCodec {
@Override
protected int split(byte[] minPackedValue, byte[] maxPackedValue, int[] parentDims) {
// BKD normally defaults by the widest dimension, to try to make as squarish cells as possible, but we just pick a random one ;)
return random.nextInt(numIndexDims);
return random.nextInt(config.numIndexDims);
}
}
}