mirror of https://github.com/apache/lucene.git
Convert BKDConfig to a record (#13668)
This commit is contained in:
parent
0533effe48
commit
4404aa3fe9
|
@ -642,13 +642,13 @@ public class BKDWriter60 implements Closeable {
|
|||
throws IOException {
|
||||
assert docMaps == null || readers.size() == docMaps.size();
|
||||
|
||||
BKDMergeQueue queue = new BKDMergeQueue(config.bytesPerDim, readers.size());
|
||||
BKDMergeQueue queue = new BKDMergeQueue(config.bytesPerDim(), readers.size());
|
||||
|
||||
for (int i = 0; i < readers.size(); i++) {
|
||||
PointValues pointValues = readers.get(i);
|
||||
assert pointValues.getNumDimensions() == config.numDims
|
||||
&& pointValues.getBytesPerDimension() == config.bytesPerDim
|
||||
&& pointValues.getNumIndexDimensions() == config.numIndexDims;
|
||||
assert pointValues.getNumDimensions() == config.numDims()
|
||||
&& pointValues.getBytesPerDimension() == config.bytesPerDim()
|
||||
&& pointValues.getNumIndexDimensions() == config.numIndexDims();
|
||||
MergeState.DocMap docMap;
|
||||
if (docMaps == null) {
|
||||
docMap = null;
|
||||
|
|
|
@ -71,8 +71,8 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
this.pointCount = pointCount;
|
||||
this.docCount = docCount;
|
||||
this.version = SimpleTextBKDWriter.VERSION_CURRENT;
|
||||
assert minPackedValue.length == config.packedIndexBytesLength;
|
||||
assert maxPackedValue.length == config.packedIndexBytesLength;
|
||||
assert minPackedValue.length == config.packedIndexBytesLength();
|
||||
assert maxPackedValue.length == config.packedIndexBytesLength();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -99,8 +99,8 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
private SimpleTextPointTree(
|
||||
IndexInput in, int nodeID, int level, byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
this.in = in;
|
||||
this.scratchDocIDs = new int[config.maxPointsInLeafNode];
|
||||
this.scratchPackedValue = new byte[config.packedBytesLength];
|
||||
this.scratchDocIDs = new int[config.maxPointsInLeafNode()];
|
||||
this.scratchPackedValue = new byte[config.packedBytesLength()];
|
||||
this.nodeID = nodeID;
|
||||
this.rootNode = nodeID;
|
||||
this.level = level;
|
||||
|
@ -145,38 +145,39 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
private void pushLeft() {
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
// final int splitDimPos;
|
||||
if (config.numIndexDims == 1) {
|
||||
if (config.numIndexDims() == 1) {
|
||||
splitDims[level] = 0;
|
||||
} else {
|
||||
splitDims[level] = (splitPackedValues[address++] & 0xff);
|
||||
}
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim;
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim();
|
||||
if (splitDimValueStack[level] == null) {
|
||||
splitDimValueStack[level] = new byte[config.bytesPerDim];
|
||||
splitDimValueStack[level] = new byte[config.bytesPerDim()];
|
||||
}
|
||||
// save the dimension we are going to change
|
||||
System.arraycopy(
|
||||
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
|
||||
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
|
||||
assert Arrays.compareUnsigned(
|
||||
maxPackedValue,
|
||||
splitDimPos,
|
||||
splitDimPos + config.bytesPerDim,
|
||||
splitDimPos + config.bytesPerDim(),
|
||||
splitPackedValues,
|
||||
address,
|
||||
address + config.bytesPerDim)
|
||||
address + config.bytesPerDim())
|
||||
>= 0
|
||||
: "config.bytesPerDim="
|
||||
+ config.bytesPerDim
|
||||
: "config.bytesPerDim()="
|
||||
+ config.bytesPerDim()
|
||||
+ " splitDim="
|
||||
+ splitDims[level]
|
||||
+ " config.numIndexDims="
|
||||
+ config.numIndexDims
|
||||
+ " config.numIndexDims()="
|
||||
+ config.numIndexDims()
|
||||
+ " config.numDims="
|
||||
+ config.numDims;
|
||||
+ config.numDims();
|
||||
nodeID *= 2;
|
||||
level++;
|
||||
// add the split dim value:
|
||||
System.arraycopy(splitPackedValues, address, maxPackedValue, splitDimPos, config.bytesPerDim);
|
||||
System.arraycopy(
|
||||
splitPackedValues, address, maxPackedValue, splitDimPos, config.bytesPerDim());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -191,37 +192,38 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
|
||||
private void pushRight() {
|
||||
int address = nodeID * bytesPerIndexEntry;
|
||||
if (config.numIndexDims == 1) {
|
||||
if (config.numIndexDims() == 1) {
|
||||
splitDims[level] = 0;
|
||||
} else {
|
||||
splitDims[level] = (splitPackedValues[address++] & 0xff);
|
||||
}
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim;
|
||||
final int splitDimPos = splitDims[level] * config.bytesPerDim();
|
||||
// we should have already visit the left node
|
||||
assert splitDimValueStack[level] != null;
|
||||
// save the dimension we are going to change
|
||||
System.arraycopy(
|
||||
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
|
||||
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
|
||||
assert Arrays.compareUnsigned(
|
||||
minPackedValue,
|
||||
splitDimPos,
|
||||
splitDimPos + config.bytesPerDim,
|
||||
splitDimPos + config.bytesPerDim(),
|
||||
splitPackedValues,
|
||||
address,
|
||||
address + config.bytesPerDim)
|
||||
address + config.bytesPerDim())
|
||||
<= 0
|
||||
: "config.bytesPerDim="
|
||||
+ config.bytesPerDim
|
||||
: "config.bytesPerDim()="
|
||||
+ config.bytesPerDim()
|
||||
+ " splitDim="
|
||||
+ splitDims[level]
|
||||
+ " config.numIndexDims="
|
||||
+ config.numIndexDims
|
||||
+ " config.numIndexDims()="
|
||||
+ config.numIndexDims()
|
||||
+ " config.numDims="
|
||||
+ config.numDims;
|
||||
+ config.numDims();
|
||||
nodeID = 2 * nodeID + 1;
|
||||
level++;
|
||||
// add the split dim value:
|
||||
System.arraycopy(splitPackedValues, address, minPackedValue, splitDimPos, config.bytesPerDim);
|
||||
System.arraycopy(
|
||||
splitPackedValues, address, minPackedValue, splitDimPos, config.bytesPerDim());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -242,16 +244,16 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
splitDimValueStack[level],
|
||||
0,
|
||||
maxPackedValue,
|
||||
splitDims[level] * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDims[level] * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
} else {
|
||||
|
||||
System.arraycopy(
|
||||
splitDimValueStack[level],
|
||||
0,
|
||||
minPackedValue,
|
||||
splitDims[level] * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDims[level] * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -290,7 +292,7 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
private long sizeFromBalancedTree(int leftMostLeafNode, int rightMostLeafNode) {
|
||||
// number of points that need to be distributed between leaves, one per leaf
|
||||
final int extraPoints =
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode * leafNodeOffset) - pointCount);
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode() * leafNodeOffset) - pointCount);
|
||||
assert extraPoints < leafNodeOffset : "point excess should be lower than leafNodeOffset";
|
||||
// offset where we stop adding one point to the leaves
|
||||
final int nodeOffset = leafNodeOffset - extraPoints;
|
||||
|
@ -298,9 +300,9 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
for (int node = leftMostLeafNode; node <= rightMostLeafNode; node++) {
|
||||
// offsetPosition provides which extra point will be added to this node
|
||||
if (balanceTreeNodePosition(0, leafNodeOffset, node - leafNodeOffset, 0, 0) < nodeOffset) {
|
||||
count += config.maxPointsInLeafNode;
|
||||
count += config.maxPointsInLeafNode();
|
||||
} else {
|
||||
count += config.maxPointsInLeafNode - 1;
|
||||
count += config.maxPointsInLeafNode() - 1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
|
@ -376,14 +378,14 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
// Again, this time reading values and checking with the visitor
|
||||
visitor.grow(count);
|
||||
// NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
|
||||
assert scratchPackedValue.length == config.packedBytesLength;
|
||||
assert scratchPackedValue.length == config.packedBytesLength();
|
||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
for (int i = 0; i < count; i++) {
|
||||
readLine(in, scratch);
|
||||
assert startsWith(scratch, BLOCK_VALUE);
|
||||
BytesRef br = SimpleTextUtil.fromBytesRefString(stripPrefix(scratch, BLOCK_VALUE));
|
||||
assert br.length == config.packedBytesLength;
|
||||
System.arraycopy(br.bytes, br.offset, scratchPackedValue, 0, config.packedBytesLength);
|
||||
assert br.length == config.packedBytesLength();
|
||||
System.arraycopy(br.bytes, br.offset, scratchPackedValue, 0, config.packedBytesLength());
|
||||
visitor.visit(scratchDocIDs[i], scratchPackedValue);
|
||||
}
|
||||
} else {
|
||||
|
@ -443,17 +445,17 @@ final class SimpleTextBKDReader extends PointValues {
|
|||
|
||||
@Override
|
||||
public int getNumDimensions() throws IOException {
|
||||
return config.numDims;
|
||||
return config.numDims();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumIndexDimensions() throws IOException {
|
||||
return config.numIndexDims;
|
||||
return config.numIndexDims();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension() throws IOException {
|
||||
return config.bytesPerDim;
|
||||
return config.bytesPerDim();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -144,28 +144,28 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
this.maxDoc = maxDoc;
|
||||
docsSeen = new FixedBitSet(maxDoc);
|
||||
|
||||
scratchDiff = new byte[config.bytesPerDim];
|
||||
scratch1 = new byte[config.packedBytesLength];
|
||||
scratch2 = new byte[config.packedBytesLength];
|
||||
commonPrefixLengths = new int[config.numDims];
|
||||
scratchDiff = new byte[config.bytesPerDim()];
|
||||
scratch1 = new byte[config.packedBytesLength()];
|
||||
scratch2 = new byte[config.packedBytesLength()];
|
||||
commonPrefixLengths = new int[config.numDims()];
|
||||
|
||||
minPackedValue = new byte[config.packedIndexBytesLength];
|
||||
maxPackedValue = new byte[config.packedIndexBytesLength];
|
||||
minPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
maxPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
|
||||
// Maximum number of points we hold in memory at any time
|
||||
maxPointsSortInHeap =
|
||||
(int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc * config.numDims));
|
||||
(int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc() * config.numDims()));
|
||||
|
||||
// Finally, we must be able to hold at least the leaf node in heap during build:
|
||||
if (maxPointsSortInHeap < config.maxPointsInLeafNode) {
|
||||
if (maxPointsSortInHeap < config.maxPointsInLeafNode()) {
|
||||
throw new IllegalArgumentException(
|
||||
"maxMBSortInHeap="
|
||||
+ maxMBSortInHeap
|
||||
+ " only allows for maxPointsSortInHeap="
|
||||
+ maxPointsSortInHeap
|
||||
+ ", but this is less than config.maxPointsInLeafNode="
|
||||
+ config.maxPointsInLeafNode
|
||||
+ "; either increase maxMBSortInHeap or decrease config.maxPointsInLeafNode");
|
||||
+ ", but this is less than config.maxPointsInLeafNode()="
|
||||
+ config.maxPointsInLeafNode()
|
||||
+ "; either increase maxMBSortInHeap or decrease config.maxPointsInLeafNode()");
|
||||
}
|
||||
|
||||
this.maxMBSortInHeap = maxMBSortInHeap;
|
||||
|
@ -183,10 +183,10 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
public void add(byte[] packedValue, int docID) throws IOException {
|
||||
if (packedValue.length != config.packedBytesLength) {
|
||||
if (packedValue.length != config.packedBytesLength()) {
|
||||
throw new IllegalArgumentException(
|
||||
"packedValue should be length="
|
||||
+ config.packedBytesLength
|
||||
+ config.packedBytesLength()
|
||||
+ " (got: "
|
||||
+ packedValue.length
|
||||
+ ")");
|
||||
|
@ -209,30 +209,30 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
} else {
|
||||
pointWriter = new HeapPointWriter(config, Math.toIntExact(totalPointCount));
|
||||
}
|
||||
System.arraycopy(packedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(packedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(packedValue, 0, minPackedValue, 0, config.packedIndexBytesLength());
|
||||
System.arraycopy(packedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength());
|
||||
} else {
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim,
|
||||
offset + config.bytesPerDim(),
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
< 0) {
|
||||
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim);
|
||||
System.arraycopy(packedValue, offset, minPackedValue, offset, config.bytesPerDim());
|
||||
}
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim,
|
||||
offset + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim);
|
||||
System.arraycopy(packedValue, offset, maxPackedValue, offset, config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -254,7 +254,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
*/
|
||||
public long writeField(IndexOutput out, String fieldName, MutablePointTree reader)
|
||||
throws IOException {
|
||||
if (config.numIndexDims == 1) {
|
||||
if (config.numIndexDims() == 1) {
|
||||
return writeField1Dim(out, fieldName, reader);
|
||||
} else {
|
||||
return writeFieldNDims(out, fieldName, reader);
|
||||
|
@ -280,7 +280,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
long countPerLeaf = pointCount = values.size();
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > config.maxPointsInLeafNode) {
|
||||
while (countPerLeaf > config.maxPointsInLeafNode()) {
|
||||
countPerLeaf = (countPerLeaf + 1) / 2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
@ -289,7 +289,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
checkMaxLeafNodeCount(numLeaves);
|
||||
|
||||
final byte[] splitPackedValues = new byte[numLeaves * (config.bytesPerDim + 1)];
|
||||
final byte[] splitPackedValues = new byte[numLeaves * (config.bytesPerDim() + 1)];
|
||||
final long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// compute the min/max for this slice
|
||||
|
@ -297,37 +297,37 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
Arrays.fill(maxPackedValue, (byte) 0);
|
||||
for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
|
||||
values.getValue(i, scratchBytesRef1);
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
if (Arrays.compareUnsigned(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim(),
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
< 0) {
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
minPackedValue,
|
||||
offset,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
}
|
||||
if (Arrays.compareUnsigned(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim,
|
||||
scratchBytesRef1.offset + offset + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + offset,
|
||||
maxPackedValue,
|
||||
offset,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -345,7 +345,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
maxPackedValue,
|
||||
splitPackedValues,
|
||||
leafBlockFPs,
|
||||
new int[config.maxPointsInLeafNode]);
|
||||
new int[config.maxPointsInLeafNode()]);
|
||||
|
||||
long indexFP = out.getFilePointer();
|
||||
writeIndex(out, leafBlockFPs, splitPackedValues, Math.toIntExact(countPerLeaf));
|
||||
|
@ -387,15 +387,15 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final IndexOutput out;
|
||||
final List<Long> leafBlockFPs = new ArrayList<>();
|
||||
final List<byte[]> leafBlockStartValues = new ArrayList<>();
|
||||
final byte[] leafValues = new byte[config.maxPointsInLeafNode * config.packedBytesLength];
|
||||
final int[] leafDocs = new int[config.maxPointsInLeafNode];
|
||||
final byte[] leafValues = new byte[config.maxPointsInLeafNode() * config.packedBytesLength()];
|
||||
final int[] leafDocs = new int[config.maxPointsInLeafNode()];
|
||||
long valueCount;
|
||||
int leafCount;
|
||||
|
||||
OneDimensionBKDWriter(IndexOutput out) {
|
||||
if (config.numIndexDims != 1) {
|
||||
if (config.numIndexDims() != 1) {
|
||||
throw new UnsupportedOperationException(
|
||||
"config.numIndexDims must be 1 but got " + config.numIndexDims);
|
||||
"config.numIndexDims() must be 1 but got " + config.numIndexDims());
|
||||
}
|
||||
if (pointCount != 0) {
|
||||
throw new IllegalStateException("cannot mix add and merge");
|
||||
|
@ -411,7 +411,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
this.out = out;
|
||||
|
||||
lastPackedValue = new byte[config.packedBytesLength];
|
||||
lastPackedValue = new byte[config.packedBytesLength()];
|
||||
}
|
||||
|
||||
// for asserts
|
||||
|
@ -426,8 +426,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
packedValue,
|
||||
0,
|
||||
leafValues,
|
||||
leafCount * config.packedBytesLength,
|
||||
config.packedBytesLength);
|
||||
leafCount * config.packedBytesLength(),
|
||||
config.packedBytesLength());
|
||||
leafDocs[leafCount] = docID;
|
||||
docsSeen.set(docID);
|
||||
leafCount++;
|
||||
|
@ -441,7 +441,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
+ " values");
|
||||
}
|
||||
|
||||
if (leafCount == config.maxPointsInLeafNode) {
|
||||
if (leafCount == config.maxPointsInLeafNode()) {
|
||||
// We write a block once we hit exactly the max count ... this is different from
|
||||
// when we flush a new segment, where we write between max/2 and max per leaf block,
|
||||
// so merged segments will behave differently from newly flushed segments:
|
||||
|
@ -471,43 +471,44 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts="
|
||||
// + leafBlockStartValues.size());
|
||||
|
||||
byte[] index = new byte[(1 + numInnerNodes) * (1 + config.bytesPerDim)];
|
||||
byte[] index = new byte[(1 + numInnerNodes) * (1 + config.bytesPerDim())];
|
||||
rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
|
||||
long[] arr = new long[leafBlockFPs.size()];
|
||||
for (int i = 0; i < leafBlockFPs.size(); i++) {
|
||||
arr[i] = leafBlockFPs.get(i);
|
||||
}
|
||||
writeIndex(out, arr, index, config.maxPointsInLeafNode);
|
||||
writeIndex(out, arr, index, config.maxPointsInLeafNode());
|
||||
return indexFP;
|
||||
}
|
||||
|
||||
private void writeLeafBlock() throws IOException {
|
||||
assert leafCount != 0;
|
||||
if (valueCount == 0) {
|
||||
System.arraycopy(leafValues, 0, minPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(leafValues, 0, minPackedValue, 0, config.packedIndexBytesLength());
|
||||
}
|
||||
System.arraycopy(
|
||||
leafValues,
|
||||
(leafCount - 1) * config.packedBytesLength,
|
||||
(leafCount - 1) * config.packedBytesLength(),
|
||||
maxPackedValue,
|
||||
0,
|
||||
config.packedIndexBytesLength);
|
||||
config.packedIndexBytesLength());
|
||||
|
||||
valueCount += leafCount;
|
||||
|
||||
if (leafBlockFPs.size() > 0) {
|
||||
// Save the first (minimum) value in each leaf block except the first, to build the split
|
||||
// value index in the end:
|
||||
leafBlockStartValues.add(ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength));
|
||||
leafBlockStartValues.add(
|
||||
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength()));
|
||||
}
|
||||
leafBlockFPs.add(out.getFilePointer());
|
||||
checkMaxLeafNodeCount(leafBlockFPs.size());
|
||||
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
|
||||
// Find per-dim common prefix:
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
int offset1 = dim * config.bytesPerDim;
|
||||
int offset2 = (leafCount - 1) * config.packedBytesLength + offset1;
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
int offset1 = dim * config.bytesPerDim();
|
||||
int offset2 = (leafCount - 1) * config.packedBytesLength() + offset1;
|
||||
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
|
||||
if (leafValues[offset1 + j] != leafValues[offset2 + j]) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
|
@ -523,24 +524,24 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
{
|
||||
scratch.length = config.packedBytesLength;
|
||||
scratch.length = config.packedBytesLength();
|
||||
scratch.bytes = leafValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef apply(int i) {
|
||||
scratch.offset = config.packedBytesLength * i;
|
||||
scratch.offset = config.packedBytesLength() * i;
|
||||
return scratch;
|
||||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(
|
||||
leafCount,
|
||||
0,
|
||||
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength),
|
||||
ArrayUtil.copyOfSubArray(leafValues, 0, config.packedBytesLength()),
|
||||
ArrayUtil.copyOfSubArray(
|
||||
leafValues,
|
||||
(leafCount - 1) * config.packedBytesLength,
|
||||
leafCount * config.packedBytesLength),
|
||||
(leafCount - 1) * config.packedBytesLength(),
|
||||
leafCount * config.packedBytesLength()),
|
||||
packedValues,
|
||||
leafDocs,
|
||||
0);
|
||||
|
@ -552,7 +553,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
private void rotateToTree(
|
||||
int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
|
||||
// System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + "
|
||||
// bpd=" + config.bytesPerDim + " index.length=" + index.length);
|
||||
// bpd=" + config.bytesPerDim() + " index.length=" + index.length);
|
||||
if (count == 1) {
|
||||
// Leaf index node
|
||||
// System.out.println(" leaf index node");
|
||||
|
@ -561,8 +562,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
leafBlockStartValues.get(offset),
|
||||
0,
|
||||
index,
|
||||
nodeID * (1 + config.bytesPerDim) + 1,
|
||||
config.bytesPerDim);
|
||||
nodeID * (1 + config.bytesPerDim()) + 1,
|
||||
config.bytesPerDim());
|
||||
} else if (count > 1) {
|
||||
// Internal index node: binary partition of count
|
||||
int countAtLevel = 1;
|
||||
|
@ -587,8 +588,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
leafBlockStartValues.get(rootOffset),
|
||||
0,
|
||||
index,
|
||||
nodeID * (1 + config.bytesPerDim) + 1,
|
||||
config.bytesPerDim);
|
||||
nodeID * (1 + config.bytesPerDim()) + 1,
|
||||
config.bytesPerDim());
|
||||
// System.out.println(" index[" + nodeID + "] = blockStartValues[" + rootOffset + "]");
|
||||
|
||||
// TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree
|
||||
|
@ -611,10 +612,10 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
private void checkMaxLeafNodeCount(int numLeaves) {
|
||||
if ((1 + config.bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
if ((1 + config.bytesPerDim()) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
|
||||
throw new IllegalStateException(
|
||||
"too many nodes; increase config.maxPointsInLeafNode (currently "
|
||||
+ config.maxPointsInLeafNode
|
||||
"too many nodes; increase config.maxPointsInLeafNode() (currently "
|
||||
+ config.maxPointsInLeafNode()
|
||||
+ ") and reindex");
|
||||
}
|
||||
}
|
||||
|
@ -652,7 +653,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
long countPerLeaf = pointCount;
|
||||
long innerNodeCount = 1;
|
||||
|
||||
while (countPerLeaf > config.maxPointsInLeafNode) {
|
||||
while (countPerLeaf > config.maxPointsInLeafNode()) {
|
||||
countPerLeaf = (countPerLeaf + 1) / 2;
|
||||
innerNodeCount *= 2;
|
||||
}
|
||||
|
@ -667,20 +668,20 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
// Indexed by nodeID, but first (root) nodeID is 1. We do 1+ because the lead byte at each
|
||||
// recursion says which dim we split on.
|
||||
byte[] splitPackedValues = new byte[Math.multiplyExact(numLeaves, 1 + config.bytesPerDim)];
|
||||
byte[] splitPackedValues = new byte[Math.multiplyExact(numLeaves, 1 + config.bytesPerDim())];
|
||||
|
||||
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g.
|
||||
// 7)
|
||||
long[] leafBlockFPs = new long[numLeaves];
|
||||
|
||||
// Make sure the math above "worked":
|
||||
assert pointCount / numLeaves <= config.maxPointsInLeafNode
|
||||
assert pointCount / numLeaves <= config.maxPointsInLeafNode()
|
||||
: "pointCount="
|
||||
+ pointCount
|
||||
+ " numLeaves="
|
||||
+ numLeaves
|
||||
+ " config.maxPointsInLeafNode="
|
||||
+ config.maxPointsInLeafNode;
|
||||
+ " config.maxPointsInLeafNode()="
|
||||
+ config.maxPointsInLeafNode();
|
||||
|
||||
// We re-use the selector so we do not need to create an object every time.
|
||||
BKDRadixSelector radixSelector =
|
||||
|
@ -699,7 +700,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
maxPackedValue,
|
||||
splitPackedValues,
|
||||
leafBlockFPs,
|
||||
new int[config.maxPointsInLeafNode]);
|
||||
new int[config.maxPointsInLeafNode()]);
|
||||
|
||||
// If no exception, we should have cleaned everything up:
|
||||
assert tempDir.getCreatedFiles().isEmpty();
|
||||
|
@ -724,15 +725,15 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues, int maxPointsInLeafNode)
|
||||
throws IOException {
|
||||
write(out, NUM_DATA_DIMS);
|
||||
writeInt(out, config.numDims);
|
||||
writeInt(out, config.numDims());
|
||||
newline(out);
|
||||
|
||||
write(out, NUM_INDEX_DIMS);
|
||||
writeInt(out, config.numIndexDims);
|
||||
writeInt(out, config.numIndexDims());
|
||||
newline(out);
|
||||
|
||||
write(out, BYTES_PER_DIM);
|
||||
writeInt(out, config.bytesPerDim);
|
||||
writeInt(out, config.bytesPerDim());
|
||||
newline(out);
|
||||
|
||||
write(out, MAX_LEAF_POINTS);
|
||||
|
@ -767,8 +768,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
newline(out);
|
||||
}
|
||||
|
||||
assert (splitPackedValues.length % (1 + config.bytesPerDim)) == 0;
|
||||
int count = splitPackedValues.length / (1 + config.bytesPerDim);
|
||||
assert (splitPackedValues.length % (1 + config.bytesPerDim())) == 0;
|
||||
int count = splitPackedValues.length / (1 + config.bytesPerDim());
|
||||
assert count == leafBlockFPs.length;
|
||||
|
||||
write(out, SPLIT_COUNT);
|
||||
|
@ -777,10 +778,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
for (int i = 0; i < count; i++) {
|
||||
write(out, SPLIT_DIM);
|
||||
writeInt(out, splitPackedValues[i * (1 + config.bytesPerDim)] & 0xff);
|
||||
writeInt(out, splitPackedValues[i * (1 + config.bytesPerDim())] & 0xff);
|
||||
newline(out);
|
||||
write(out, SPLIT_VALUE);
|
||||
br = new BytesRef(splitPackedValues, 1 + (i * (1 + config.bytesPerDim)), config.bytesPerDim);
|
||||
br =
|
||||
new BytesRef(
|
||||
splitPackedValues, 1 + (i * (1 + config.bytesPerDim())), config.bytesPerDim());
|
||||
write(out, br.toString());
|
||||
newline(out);
|
||||
}
|
||||
|
@ -852,25 +855,25 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
/** Called only in assert */
|
||||
private boolean valueInBounds(
|
||||
BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
int offset = config.bytesPerDim * dim;
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
int offset = config.bytesPerDim() * dim;
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim,
|
||||
packedValue.offset + offset + config.bytesPerDim(),
|
||||
minPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
< 0) {
|
||||
return false;
|
||||
}
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim,
|
||||
packedValue.offset + offset + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
offset,
|
||||
offset + config.bytesPerDim)
|
||||
offset + config.bytesPerDim())
|
||||
> 0) {
|
||||
return false;
|
||||
}
|
||||
|
@ -882,13 +885,13 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
// Find which dim has the largest span so we can split on it:
|
||||
int splitDim = -1;
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
NumericUtils.subtract(config.bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
NumericUtils.subtract(config.bytesPerDim(), dim, maxPackedValue, minPackedValue, scratchDiff);
|
||||
if (splitDim == -1
|
||||
|| Arrays.compareUnsigned(
|
||||
scratchDiff, 0, config.bytesPerDim, scratch1, 0, config.bytesPerDim)
|
||||
scratchDiff, 0, config.bytesPerDim(), scratch1, 0, config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim);
|
||||
System.arraycopy(scratchDiff, 0, scratch1, 0, config.bytesPerDim());
|
||||
splitDim = dim;
|
||||
}
|
||||
}
|
||||
|
@ -931,15 +934,15 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
if (nodeID >= leafNodeOffset) {
|
||||
// leaf node
|
||||
final int count = to - from;
|
||||
assert count <= config.maxPointsInLeafNode;
|
||||
assert count <= config.maxPointsInLeafNode();
|
||||
|
||||
// Compute common prefixes
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
reader.getValue(i, scratchBytesRef2);
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
final int offset = dim * config.bytesPerDim;
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
final int offset = dim * config.bytesPerDim();
|
||||
for (int j = 0; j < commonPrefixLengths[dim]; j++) {
|
||||
if (scratchBytesRef1.bytes[scratchBytesRef1.offset + offset + j]
|
||||
!= scratchBytesRef2.bytes[scratchBytesRef2.offset + offset + j]) {
|
||||
|
@ -951,23 +954,23 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
// Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
|
||||
for (int dim = 0; dim < config.numDims; ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim) {
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
|
||||
for (int dim = 0; dim < config.numDims(); ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
|
||||
usedBytes[dim] = new FixedBitSet(256);
|
||||
}
|
||||
}
|
||||
for (int i = from + 1; i < to; ++i) {
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
if (usedBytes[dim] != null) {
|
||||
byte b = reader.getByteAt(i, dim * config.bytesPerDim + commonPrefixLengths[dim]);
|
||||
byte b = reader.getByteAt(i, dim * config.bytesPerDim() + commonPrefixLengths[dim]);
|
||||
usedBytes[dim].set(Byte.toUnsignedInt(b));
|
||||
}
|
||||
}
|
||||
}
|
||||
int sortedDim = 0;
|
||||
int sortedDimCardinality = Integer.MAX_VALUE;
|
||||
for (int dim = 0; dim < config.numDims; ++dim) {
|
||||
for (int dim = 0; dim < config.numDims(); ++dim) {
|
||||
if (usedBytes[dim] != null) {
|
||||
final int cardinality = usedBytes[dim].cardinality();
|
||||
if (cardinality < sortedDimCardinality) {
|
||||
|
@ -1001,7 +1004,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// Write the common prefixes:
|
||||
reader.getValue(from, scratchBytesRef1);
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength);
|
||||
scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, config.packedBytesLength());
|
||||
|
||||
// Write the full values:
|
||||
IntFunction<BytesRef> packedValues =
|
||||
|
@ -1023,10 +1026,10 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final int splitDim = split(minPackedValue, maxPackedValue);
|
||||
final int mid = (from + to + 1) >>> 1;
|
||||
|
||||
int commonPrefixLen = config.bytesPerDim;
|
||||
for (int i = 0; i < config.bytesPerDim; ++i) {
|
||||
if (minPackedValue[splitDim * config.bytesPerDim + i]
|
||||
!= maxPackedValue[splitDim * config.bytesPerDim + i]) {
|
||||
int commonPrefixLen = config.bytesPerDim();
|
||||
for (int i = 0; i < config.bytesPerDim(); ++i) {
|
||||
if (minPackedValue[splitDim * config.bytesPerDim() + i]
|
||||
!= maxPackedValue[splitDim * config.bytesPerDim() + i]) {
|
||||
commonPrefixLen = i;
|
||||
break;
|
||||
}
|
||||
|
@ -1044,32 +1047,32 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
scratchBytesRef2);
|
||||
|
||||
// set the split value
|
||||
final int address = nodeID * (1 + config.bytesPerDim);
|
||||
final int address = nodeID * (1 + config.bytesPerDim());
|
||||
splitPackedValues[address] = (byte) splitDim;
|
||||
reader.getValue(mid, scratchBytesRef1);
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
|
||||
splitPackedValues,
|
||||
address + 1,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
|
||||
byte[] minSplitPackedValue =
|
||||
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength);
|
||||
ArrayUtil.copyOfSubArray(minPackedValue, 0, config.packedIndexBytesLength());
|
||||
byte[] maxSplitPackedValue =
|
||||
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength);
|
||||
ArrayUtil.copyOfSubArray(maxPackedValue, 0, config.packedIndexBytesLength());
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
|
||||
minSplitPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
System.arraycopy(
|
||||
scratchBytesRef1.bytes,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim,
|
||||
scratchBytesRef1.offset + splitDim * config.bytesPerDim(),
|
||||
maxSplitPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
|
||||
// recurse
|
||||
build(
|
||||
|
@ -1137,17 +1140,17 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
int sortedDim = 0;
|
||||
int sortedDimCardinality = Integer.MAX_VALUE;
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims];
|
||||
for (int dim = 0; dim < config.numDims; ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim) {
|
||||
FixedBitSet[] usedBytes = new FixedBitSet[config.numDims()];
|
||||
for (int dim = 0; dim < config.numDims(); ++dim) {
|
||||
if (commonPrefixLengths[dim] < config.bytesPerDim()) {
|
||||
usedBytes[dim] = new FixedBitSet(256);
|
||||
}
|
||||
}
|
||||
// Find the dimension to compress
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
int prefix = commonPrefixLengths[dim];
|
||||
if (prefix < config.bytesPerDim) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
if (prefix < config.bytesPerDim()) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
for (int i = 0; i < heapSource.count(); ++i) {
|
||||
PointValue value = heapSource.getPackedValueSlice(i);
|
||||
BytesRef packedValue = value.packedValue();
|
||||
|
@ -1190,7 +1193,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
final BytesRef scratch = new BytesRef();
|
||||
|
||||
{
|
||||
scratch.length = config.packedBytesLength;
|
||||
scratch.length = config.packedBytesLength();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1207,7 +1210,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// Inner node: partition/recurse
|
||||
|
||||
int splitDim;
|
||||
if (config.numIndexDims > 1) {
|
||||
if (config.numIndexDims() > 1) {
|
||||
splitDim = split(minPackedValue, maxPackedValue);
|
||||
} else {
|
||||
splitDim = 0;
|
||||
|
@ -1223,13 +1226,13 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
int commonPrefixLen =
|
||||
Arrays.mismatch(
|
||||
minPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
splitDim * config.bytesPerDim + config.bytesPerDim,
|
||||
splitDim * config.bytesPerDim(),
|
||||
splitDim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
splitDim * config.bytesPerDim,
|
||||
splitDim * config.bytesPerDim + config.bytesPerDim);
|
||||
splitDim * config.bytesPerDim(),
|
||||
splitDim * config.bytesPerDim() + config.bytesPerDim());
|
||||
if (commonPrefixLen == -1) {
|
||||
commonPrefixLen = config.bytesPerDim;
|
||||
commonPrefixLen = config.bytesPerDim();
|
||||
}
|
||||
|
||||
BKDRadixSelector.PathSlice[] pathSlices = new BKDRadixSelector.PathSlice[2];
|
||||
|
@ -1244,20 +1247,28 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
splitDim,
|
||||
commonPrefixLen);
|
||||
|
||||
int address = nodeID * (1 + config.bytesPerDim);
|
||||
int address = nodeID * (1 + config.bytesPerDim());
|
||||
splitPackedValues[address] = (byte) splitDim;
|
||||
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, config.bytesPerDim);
|
||||
System.arraycopy(splitValue, 0, splitPackedValues, address + 1, config.bytesPerDim());
|
||||
|
||||
byte[] minSplitPackedValue = new byte[config.packedIndexBytesLength];
|
||||
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, config.packedIndexBytesLength);
|
||||
byte[] minSplitPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, config.packedIndexBytesLength());
|
||||
|
||||
byte[] maxSplitPackedValue = new byte[config.packedIndexBytesLength];
|
||||
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, config.packedIndexBytesLength);
|
||||
byte[] maxSplitPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, config.packedIndexBytesLength());
|
||||
|
||||
System.arraycopy(
|
||||
splitValue, 0, minSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
|
||||
splitValue,
|
||||
0,
|
||||
minSplitPackedValue,
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
System.arraycopy(
|
||||
splitValue, 0, maxSplitPackedValue, splitDim * config.bytesPerDim, config.bytesPerDim);
|
||||
splitValue,
|
||||
0,
|
||||
maxSplitPackedValue,
|
||||
splitDim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
|
||||
// Recurse on left tree:
|
||||
build(
|
||||
|
@ -1289,30 +1300,30 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
private void computeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix) {
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim);
|
||||
Arrays.fill(commonPrefixLengths, config.bytesPerDim());
|
||||
PointValue value = heapPointWriter.getPackedValueSlice(0);
|
||||
BytesRef packedValue = value.packedValue();
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
System.arraycopy(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + dim * config.bytesPerDim,
|
||||
packedValue.offset + dim * config.bytesPerDim(),
|
||||
commonPrefix,
|
||||
dim * config.bytesPerDim,
|
||||
config.bytesPerDim);
|
||||
dim * config.bytesPerDim(),
|
||||
config.bytesPerDim());
|
||||
}
|
||||
for (int i = 1; i < heapPointWriter.count(); i++) {
|
||||
value = heapPointWriter.getPackedValueSlice(i);
|
||||
packedValue = value.packedValue();
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
if (commonPrefixLengths[dim] != 0) {
|
||||
int j =
|
||||
Arrays.mismatch(
|
||||
commonPrefix,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + commonPrefixLengths[dim],
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + commonPrefixLengths[dim],
|
||||
packedValue.bytes,
|
||||
packedValue.offset + dim * config.bytesPerDim,
|
||||
packedValue.offset + dim * config.bytesPerDim + commonPrefixLengths[dim]);
|
||||
packedValue.offset + dim * config.bytesPerDim(),
|
||||
packedValue.offset + dim * config.bytesPerDim() + commonPrefixLengths[dim]);
|
||||
if (j != -1) {
|
||||
commonPrefixLengths[dim] = j;
|
||||
}
|
||||
|
@ -1331,11 +1342,11 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
int[] docs,
|
||||
int docsOffset)
|
||||
throws IOException {
|
||||
byte[] lastPackedValue = new byte[config.packedBytesLength];
|
||||
byte[] lastPackedValue = new byte[config.packedBytesLength()];
|
||||
int lastDoc = -1;
|
||||
for (int i = 0; i < count; i++) {
|
||||
BytesRef packedValue = values.apply(i);
|
||||
assert packedValue.length == config.packedBytesLength;
|
||||
assert packedValue.length == config.packedBytesLength();
|
||||
assert valueInOrder(
|
||||
i,
|
||||
sortedDim,
|
||||
|
@ -1361,43 +1372,43 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
int packedValueOffset,
|
||||
int doc,
|
||||
int lastDoc) {
|
||||
int dimOffset = sortedDim * config.bytesPerDim;
|
||||
int dimOffset = sortedDim * config.bytesPerDim();
|
||||
if (ord > 0) {
|
||||
int cmp =
|
||||
Arrays.compareUnsigned(
|
||||
lastPackedValue,
|
||||
dimOffset,
|
||||
dimOffset + config.bytesPerDim,
|
||||
dimOffset + config.bytesPerDim(),
|
||||
packedValue,
|
||||
packedValueOffset + dimOffset,
|
||||
packedValueOffset + dimOffset + config.bytesPerDim);
|
||||
packedValueOffset + dimOffset + config.bytesPerDim());
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError(
|
||||
"values out of order: last value="
|
||||
+ new BytesRef(lastPackedValue)
|
||||
+ " current value="
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength)
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength())
|
||||
+ " ord="
|
||||
+ ord
|
||||
+ " sortedDim="
|
||||
+ sortedDim);
|
||||
}
|
||||
if (cmp == 0 && config.numDims > config.numIndexDims) {
|
||||
int dataOffset = config.numIndexDims * config.bytesPerDim;
|
||||
if (cmp == 0 && config.numDims() > config.numIndexDims()) {
|
||||
int dataOffset = config.numIndexDims() * config.bytesPerDim();
|
||||
cmp =
|
||||
Arrays.compareUnsigned(
|
||||
lastPackedValue,
|
||||
dataOffset,
|
||||
config.packedBytesLength,
|
||||
config.packedBytesLength(),
|
||||
packedValue,
|
||||
packedValueOffset + dataOffset,
|
||||
packedValueOffset + config.packedBytesLength);
|
||||
packedValueOffset + config.packedBytesLength());
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError(
|
||||
"data values out of order: last value="
|
||||
+ new BytesRef(lastPackedValue)
|
||||
+ " current value="
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength)
|
||||
+ new BytesRef(packedValue, packedValueOffset, config.packedBytesLength())
|
||||
+ " ord="
|
||||
+ ord);
|
||||
}
|
||||
|
@ -1414,7 +1425,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
+ sortedDim);
|
||||
}
|
||||
}
|
||||
System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, config.packedBytesLength);
|
||||
System.arraycopy(
|
||||
packedValue, packedValueOffset, lastPackedValue, 0, config.packedBytesLength());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,8 +19,15 @@ package org.apache.lucene.util.bkd;
|
|||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/** Basic parameters for indexing points on the BKD tree. */
|
||||
public final class BKDConfig {
|
||||
/**
|
||||
* Basic parameters for indexing points on the BKD tree.
|
||||
*
|
||||
* @param numDims How many dimensions we are storing at the leaf (data) node
|
||||
* @param numIndexDims How many dimensions we are indexing in the internal nodes
|
||||
* @param bytesPerDim How many bytes each value in each dimension takes.
|
||||
* @param maxPointsInLeafNode max points allowed on a Leaf block
|
||||
*/
|
||||
public record BKDConfig(int numDims, int numIndexDims, int bytesPerDim, int maxPointsInLeafNode) {
|
||||
|
||||
/** Default maximum number of point in each leaf block */
|
||||
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 512;
|
||||
|
@ -31,48 +38,7 @@ public final class BKDConfig {
|
|||
/** Maximum number of index dimensions */
|
||||
public static final int MAX_INDEX_DIMS = 8;
|
||||
|
||||
/** How many dimensions we are storing at the leaf (data) nodes */
|
||||
public final int numDims;
|
||||
|
||||
/** How many dimensions we are indexing in the internal nodes */
|
||||
public final int numIndexDims;
|
||||
|
||||
/** How many bytes each value in each dimension takes. */
|
||||
public final int bytesPerDim;
|
||||
|
||||
/** max points allowed on a Leaf block */
|
||||
public final int maxPointsInLeafNode;
|
||||
|
||||
/** numDataDims * bytesPerDim */
|
||||
public final int packedBytesLength;
|
||||
|
||||
/** numIndexDims * bytesPerDim */
|
||||
public final int packedIndexBytesLength;
|
||||
|
||||
/** packedBytesLength plus docID size */
|
||||
public final int bytesPerDoc;
|
||||
|
||||
public BKDConfig(
|
||||
final int numDims,
|
||||
final int numIndexDims,
|
||||
final int bytesPerDim,
|
||||
final int maxPointsInLeafNode) {
|
||||
verifyParams(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
|
||||
this.numDims = numDims;
|
||||
this.numIndexDims = numIndexDims;
|
||||
this.bytesPerDim = bytesPerDim;
|
||||
this.maxPointsInLeafNode = maxPointsInLeafNode;
|
||||
this.packedIndexBytesLength = numIndexDims * bytesPerDim;
|
||||
this.packedBytesLength = numDims * bytesPerDim;
|
||||
// dimensional values (numDims * bytesPerDim) + docID (int)
|
||||
this.bytesPerDoc = this.packedBytesLength + Integer.BYTES;
|
||||
}
|
||||
|
||||
private static void verifyParams(
|
||||
final int numDims,
|
||||
final int numIndexDims,
|
||||
final int bytesPerDim,
|
||||
final int maxPointsInLeafNode) {
|
||||
public BKDConfig {
|
||||
// Check inputs are on bounds
|
||||
if (numDims < 1 || numDims > MAX_DIMS) {
|
||||
throw new IllegalArgumentException(
|
||||
|
@ -101,4 +67,19 @@ public final class BKDConfig {
|
|||
+ maxPointsInLeafNode);
|
||||
}
|
||||
}
|
||||
|
||||
/** numDims * bytesPerDim */
|
||||
public int packedBytesLength() {
|
||||
return numDims * bytesPerDim;
|
||||
}
|
||||
|
||||
/** numIndexDims * bytesPerDim */
|
||||
public int packedIndexBytesLength() {
|
||||
return numIndexDims * bytesPerDim;
|
||||
}
|
||||
|
||||
/** (numDims * bytesPerDim) + Integer.BYTES (packedBytesLength plus docID size) */
|
||||
public int bytesPerDoc() {
|
||||
return packedBytesLength() + Integer.BYTES;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -39,7 +39,7 @@ public final class BKDRadixSelector {
|
|||
private static final int MAX_SIZE_OFFLINE_BUFFER = 1024 * 8;
|
||||
// histogram array
|
||||
private final long[] histogram;
|
||||
// number of bytes to be sorted: config.bytesPerDim + Integer.BYTES
|
||||
// number of bytes to be sorted: config.bytesPerDim() + Integer.BYTES
|
||||
private final int bytesSorted;
|
||||
// flag to when we are moving to sort on heap
|
||||
private final int maxPointsSortInHeap;
|
||||
|
@ -69,11 +69,11 @@ public final class BKDRadixSelector {
|
|||
// equal
|
||||
// we tie-break on the docID. Here we account for all bytes used in the process.
|
||||
this.bytesSorted =
|
||||
config.bytesPerDim
|
||||
+ (config.numDims - config.numIndexDims) * config.bytesPerDim
|
||||
config.bytesPerDim()
|
||||
+ (config.numDims() - config.numIndexDims()) * config.bytesPerDim()
|
||||
+ Integer.BYTES;
|
||||
final int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / config.bytesPerDoc;
|
||||
this.offlineBuffer = new byte[numberOfPointsOffline * config.bytesPerDoc];
|
||||
final int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / config.bytesPerDoc();
|
||||
this.offlineBuffer = new byte[numberOfPointsOffline * config.bytesPerDoc()];
|
||||
this.partitionBucket = new int[bytesSorted];
|
||||
this.histogram = new long[HISTOGRAM_SIZE];
|
||||
this.scratch = new byte[bytesSorted];
|
||||
|
@ -147,7 +147,7 @@ public final class BKDRadixSelector {
|
|||
throws IOException {
|
||||
// find common prefix
|
||||
int commonPrefixPosition = bytesSorted;
|
||||
final int offset = dim * config.bytesPerDim;
|
||||
final int offset = dim * config.bytesPerDim();
|
||||
try (OfflinePointReader reader = points.getReader(from, to - from, offlineBuffer)) {
|
||||
assert commonPrefixPosition > dimCommonPrefix;
|
||||
reader.next();
|
||||
|
@ -155,14 +155,18 @@ public final class BKDRadixSelector {
|
|||
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
// copy dimension
|
||||
System.arraycopy(
|
||||
packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, config.bytesPerDim);
|
||||
packedValueDocID.bytes,
|
||||
packedValueDocID.offset + offset,
|
||||
scratch,
|
||||
0,
|
||||
config.bytesPerDim());
|
||||
// copy data dimensions and docID
|
||||
System.arraycopy(
|
||||
packedValueDocID.bytes,
|
||||
packedValueDocID.offset + config.packedIndexBytesLength,
|
||||
packedValueDocID.offset + config.packedIndexBytesLength(),
|
||||
scratch,
|
||||
config.bytesPerDim,
|
||||
(config.numDims - config.numIndexDims) * config.bytesPerDim + Integer.BYTES);
|
||||
config.bytesPerDim(),
|
||||
(config.numDims() - config.numIndexDims()) * config.bytesPerDim() + Integer.BYTES);
|
||||
|
||||
for (long i = from + 1; i < to; i++) {
|
||||
reader.next();
|
||||
|
@ -179,8 +183,8 @@ public final class BKDRadixSelector {
|
|||
break;
|
||||
} else {
|
||||
// Check common prefix and adjust histogram
|
||||
final int startIndex = Math.min(dimCommonPrefix, config.bytesPerDim);
|
||||
final int endIndex = Math.min(commonPrefixPosition, config.bytesPerDim);
|
||||
final int startIndex = Math.min(dimCommonPrefix, config.bytesPerDim());
|
||||
final int endIndex = Math.min(commonPrefixPosition, config.bytesPerDim());
|
||||
packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
int j =
|
||||
Arrays.mismatch(
|
||||
|
@ -191,20 +195,20 @@ public final class BKDRadixSelector {
|
|||
packedValueDocID.offset + offset + startIndex,
|
||||
packedValueDocID.offset + offset + endIndex);
|
||||
if (j == -1) {
|
||||
if (commonPrefixPosition > config.bytesPerDim) {
|
||||
if (commonPrefixPosition > config.bytesPerDim()) {
|
||||
// Tie-break on data dimensions + docID
|
||||
final int startTieBreak = config.packedIndexBytesLength;
|
||||
final int endTieBreak = startTieBreak + commonPrefixPosition - config.bytesPerDim;
|
||||
final int startTieBreak = config.packedIndexBytesLength();
|
||||
final int endTieBreak = startTieBreak + commonPrefixPosition - config.bytesPerDim();
|
||||
int k =
|
||||
Arrays.mismatch(
|
||||
scratch,
|
||||
config.bytesPerDim,
|
||||
config.bytesPerDim(),
|
||||
commonPrefixPosition,
|
||||
packedValueDocID.bytes,
|
||||
packedValueDocID.offset + startTieBreak,
|
||||
packedValueDocID.offset + endTieBreak);
|
||||
if (k != -1) {
|
||||
commonPrefixPosition = config.bytesPerDim + k;
|
||||
commonPrefixPosition = config.bytesPerDim() + k;
|
||||
Arrays.fill(histogram, 0);
|
||||
histogram[scratch[commonPrefixPosition] & 0xff] = i - from;
|
||||
}
|
||||
|
@ -230,7 +234,7 @@ public final class BKDRadixSelector {
|
|||
|
||||
private int getBucket(int offset, int commonPrefixPosition, PointValue pointValue) {
|
||||
int bucket;
|
||||
if (commonPrefixPosition < config.bytesPerDim) {
|
||||
if (commonPrefixPosition < config.bytesPerDim()) {
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
|
||||
} else {
|
||||
|
@ -239,9 +243,9 @@ public final class BKDRadixSelector {
|
|||
packedValueDocID
|
||||
.bytes[
|
||||
packedValueDocID.offset
|
||||
+ config.packedIndexBytesLength
|
||||
+ config.packedIndexBytesLength()
|
||||
+ commonPrefixPosition
|
||||
- config.bytesPerDim]
|
||||
- config.bytesPerDim()]
|
||||
& 0xff;
|
||||
}
|
||||
return bucket;
|
||||
|
@ -341,7 +345,7 @@ public final class BKDRadixSelector {
|
|||
long numDocsTiebreak)
|
||||
throws IOException {
|
||||
assert bytePosition == bytesSorted - 1 || deltaPoints != null;
|
||||
int offset = dim * config.bytesPerDim;
|
||||
int offset = dim * config.bytesPerDim();
|
||||
long tiebreakCounter = 0;
|
||||
try (OfflinePointReader reader = points.getReader(from, to - from, offlineBuffer)) {
|
||||
while (reader.next()) {
|
||||
|
@ -372,8 +376,8 @@ public final class BKDRadixSelector {
|
|||
}
|
||||
|
||||
private byte[] partitionPointFromCommonPrefix() {
|
||||
byte[] partition = new byte[config.bytesPerDim];
|
||||
for (int i = 0; i < config.bytesPerDim; i++) {
|
||||
byte[] partition = new byte[config.bytesPerDim()];
|
||||
for (int i = 0; i < config.bytesPerDim(); i++) {
|
||||
partition[i] = (byte) partitionBucket[i];
|
||||
}
|
||||
return partition;
|
||||
|
@ -408,9 +412,9 @@ public final class BKDRadixSelector {
|
|||
int to,
|
||||
int partitionPoint,
|
||||
int commonPrefixLength) {
|
||||
final int dimOffset = dim * config.bytesPerDim + commonPrefixLength;
|
||||
final int dimCmpBytes = config.bytesPerDim - commonPrefixLength;
|
||||
final int dataOffset = config.packedIndexBytesLength - dimCmpBytes;
|
||||
final int dimOffset = dim * config.bytesPerDim() + commonPrefixLength;
|
||||
final int dimCmpBytes = config.bytesPerDim() - commonPrefixLength;
|
||||
final int dataOffset = config.packedIndexBytesLength() - dimCmpBytes;
|
||||
new RadixSelector(bytesSorted - commonPrefixLength) {
|
||||
|
||||
@Override
|
||||
|
@ -427,7 +431,7 @@ public final class BKDRadixSelector {
|
|||
@Override
|
||||
protected Selector getFallbackSelector(int d) {
|
||||
final int skypedBytes = d + commonPrefixLength;
|
||||
final int dimStart = dim * config.bytesPerDim;
|
||||
final int dimStart = dim * config.bytesPerDim();
|
||||
return new IntroSelector() {
|
||||
|
||||
@Override
|
||||
|
@ -437,15 +441,15 @@ public final class BKDRadixSelector {
|
|||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
if (skypedBytes < config.bytesPerDim) {
|
||||
if (skypedBytes < config.bytesPerDim()) {
|
||||
points.copyDim(i, dimStart, scratch, 0);
|
||||
}
|
||||
points.copyDataDimsAndDoc(i, scratch, config.bytesPerDim);
|
||||
points.copyDataDimsAndDoc(i, scratch, config.bytesPerDim());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
if (skypedBytes < config.bytesPerDim) {
|
||||
if (skypedBytes < config.bytesPerDim()) {
|
||||
int cmp = points.compareDim(i, j, dimStart);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
|
@ -456,36 +460,36 @@ public final class BKDRadixSelector {
|
|||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (skypedBytes < config.bytesPerDim) {
|
||||
if (skypedBytes < config.bytesPerDim()) {
|
||||
int cmp = points.compareDim(j, scratch, 0, dimStart);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return points.compareDataDimsAndDoc(j, scratch, config.bytesPerDim);
|
||||
return points.compareDataDimsAndDoc(j, scratch, config.bytesPerDim());
|
||||
}
|
||||
};
|
||||
}
|
||||
}.select(from, to, partitionPoint);
|
||||
|
||||
byte[] partition = new byte[config.bytesPerDim];
|
||||
byte[] partition = new byte[config.bytesPerDim()];
|
||||
PointValue pointValue = points.getPackedValueSlice(partitionPoint);
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
System.arraycopy(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + dim * config.bytesPerDim,
|
||||
packedValue.offset + dim * config.bytesPerDim(),
|
||||
partition,
|
||||
0,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
return partition;
|
||||
}
|
||||
|
||||
/** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
|
||||
public void heapRadixSort(
|
||||
final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
|
||||
final int dimOffset = dim * config.bytesPerDim + commonPrefixLength;
|
||||
final int dimCmpBytes = config.bytesPerDim - commonPrefixLength;
|
||||
final int dataOffset = config.packedIndexBytesLength - dimCmpBytes;
|
||||
final int dimOffset = dim * config.bytesPerDim() + commonPrefixLength;
|
||||
final int dimCmpBytes = config.bytesPerDim() - commonPrefixLength;
|
||||
final int dataOffset = config.packedIndexBytesLength() - dimCmpBytes;
|
||||
new MSBRadixSorter(bytesSorted - commonPrefixLength) {
|
||||
|
||||
@Override
|
||||
|
@ -502,7 +506,7 @@ public final class BKDRadixSelector {
|
|||
@Override
|
||||
protected Sorter getFallbackSorter(int k) {
|
||||
final int skypedBytes = k + commonPrefixLength;
|
||||
final int dimStart = dim * config.bytesPerDim;
|
||||
final int dimStart = dim * config.bytesPerDim();
|
||||
return new IntroSorter() {
|
||||
|
||||
@Override
|
||||
|
@ -512,15 +516,15 @@ public final class BKDRadixSelector {
|
|||
|
||||
@Override
|
||||
protected void setPivot(int i) {
|
||||
if (skypedBytes < config.bytesPerDim) {
|
||||
if (skypedBytes < config.bytesPerDim()) {
|
||||
points.copyDim(i, dimStart, scratch, 0);
|
||||
}
|
||||
points.copyDataDimsAndDoc(i, scratch, config.bytesPerDim);
|
||||
points.copyDataDimsAndDoc(i, scratch, config.bytesPerDim());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
if (skypedBytes < config.bytesPerDim) {
|
||||
if (skypedBytes < config.bytesPerDim()) {
|
||||
final int cmp = points.compareDim(i, j, dimStart);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
|
@ -531,13 +535,13 @@ public final class BKDRadixSelector {
|
|||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (skypedBytes < config.bytesPerDim) {
|
||||
if (skypedBytes < config.bytesPerDim()) {
|
||||
int cmp = points.compareDim(j, scratch, 0, dimStart);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return points.compareDataDimsAndDoc(j, scratch, config.bytesPerDim);
|
||||
return points.compareDataDimsAndDoc(j, scratch, config.bytesPerDim());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -72,16 +72,19 @@ public class BKDReader extends PointValues {
|
|||
numLeaves = metaIn.readVInt();
|
||||
assert numLeaves > 0;
|
||||
|
||||
minPackedValue = new byte[config.packedIndexBytesLength];
|
||||
maxPackedValue = new byte[config.packedIndexBytesLength];
|
||||
minPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
maxPackedValue = new byte[config.packedIndexBytesLength()];
|
||||
|
||||
metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength);
|
||||
metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength);
|
||||
metaIn.readBytes(minPackedValue, 0, config.packedIndexBytesLength());
|
||||
metaIn.readBytes(maxPackedValue, 0, config.packedIndexBytesLength());
|
||||
final ArrayUtil.ByteArrayComparator comparator =
|
||||
ArrayUtil.getUnsignedComparator(config.bytesPerDim);
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
ArrayUtil.getUnsignedComparator(config.bytesPerDim());
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
if (comparator.compare(
|
||||
minPackedValue, dim * config.bytesPerDim, maxPackedValue, dim * config.bytesPerDim)
|
||||
minPackedValue,
|
||||
dim * config.bytesPerDim(),
|
||||
maxPackedValue,
|
||||
dim * config.bytesPerDim())
|
||||
> 0) {
|
||||
throw new CorruptIndexException(
|
||||
"minPackedValue "
|
||||
|
@ -118,7 +121,7 @@ public class BKDReader extends PointValues {
|
|||
// since lucene 8.6 all trees are unbalanced.
|
||||
return false;
|
||||
}
|
||||
if (config.numDims > 1) {
|
||||
if (config.numDims() > 1) {
|
||||
// high dimensional tree in pre-8.6 indices are balanced.
|
||||
assert 1 << MathUtil.log(numLeaves, 2) == numLeaves;
|
||||
return true;
|
||||
|
@ -128,7 +131,7 @@ public class BKDReader extends PointValues {
|
|||
return false;
|
||||
}
|
||||
// count of the last node for unbalanced trees
|
||||
final int lastLeafNodePointCount = Math.toIntExact(pointCount % config.maxPointsInLeafNode);
|
||||
final int lastLeafNodePointCount = Math.toIntExact(pointCount % config.maxPointsInLeafNode());
|
||||
// navigate to last node
|
||||
PointTree pointTree = getPointTree();
|
||||
do {
|
||||
|
@ -244,11 +247,11 @@ public class BKDReader extends PointValues {
|
|||
1,
|
||||
minPackedValue,
|
||||
maxPackedValue,
|
||||
new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode),
|
||||
new byte[config.packedBytesLength],
|
||||
new byte[config.packedIndexBytesLength],
|
||||
new byte[config.packedIndexBytesLength],
|
||||
new int[config.numDims],
|
||||
new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode()),
|
||||
new byte[config.packedBytesLength()],
|
||||
new byte[config.packedIndexBytesLength()],
|
||||
new byte[config.packedIndexBytesLength()],
|
||||
new int[config.numDims()],
|
||||
isTreeBalanced);
|
||||
// read root node
|
||||
readNodeData(false);
|
||||
|
@ -286,18 +289,18 @@ public class BKDReader extends PointValues {
|
|||
int treeDepth = getTreeDepth(numLeaves);
|
||||
splitDimValueStack = new byte[treeDepth][];
|
||||
splitValuesStack = new byte[treeDepth][];
|
||||
splitValuesStack[0] = new byte[config.packedIndexBytesLength];
|
||||
splitValuesStack[0] = new byte[config.packedIndexBytesLength()];
|
||||
leafBlockFPStack = new long[treeDepth + 1];
|
||||
readNodeDataPositions = new int[treeDepth + 1];
|
||||
rightNodePositions = new int[treeDepth];
|
||||
splitDimsPos = new int[treeDepth];
|
||||
negativeDeltas = new boolean[config.numIndexDims * treeDepth];
|
||||
negativeDeltas = new boolean[config.numIndexDims() * treeDepth];
|
||||
// information about the unbalance of the tree so we can report the exact size below a node
|
||||
this.pointCount = pointCount;
|
||||
rightMostLeafNode = (1 << treeDepth - 1) - 1;
|
||||
int lastLeafNodePointCount = Math.toIntExact(pointCount % config.maxPointsInLeafNode);
|
||||
int lastLeafNodePointCount = Math.toIntExact(pointCount % config.maxPointsInLeafNode());
|
||||
this.lastLeafNodePointCount =
|
||||
lastLeafNodePointCount == 0 ? config.maxPointsInLeafNode : lastLeafNodePointCount;
|
||||
lastLeafNodePointCount == 0 ? config.maxPointsInLeafNode() : lastLeafNodePointCount;
|
||||
// scratch objects, reused between clones so NN search are not creating those objects
|
||||
// in every clone.
|
||||
this.scratchIterator = scratchIterator;
|
||||
|
@ -336,10 +339,10 @@ public class BKDReader extends PointValues {
|
|||
index.splitValuesStack[index.level] = splitValuesStack[level].clone();
|
||||
System.arraycopy(
|
||||
negativeDeltas,
|
||||
level * config.numIndexDims,
|
||||
level * config.numIndexDims(),
|
||||
index.negativeDeltas,
|
||||
level * config.numIndexDims,
|
||||
config.numIndexDims);
|
||||
level * config.numIndexDims(),
|
||||
config.numIndexDims());
|
||||
index.splitDimsPos[level] = splitDimsPos[level];
|
||||
}
|
||||
return index;
|
||||
|
@ -375,25 +378,25 @@ public class BKDReader extends PointValues {
|
|||
private void pushBoundsLeft() {
|
||||
final int splitDimPos = splitDimsPos[level];
|
||||
if (splitDimValueStack[level] == null) {
|
||||
splitDimValueStack[level] = new byte[config.bytesPerDim];
|
||||
splitDimValueStack[level] = new byte[config.bytesPerDim()];
|
||||
}
|
||||
// save the dimension we are going to change
|
||||
System.arraycopy(
|
||||
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
|
||||
assert ArrayUtil.getUnsignedComparator(config.bytesPerDim)
|
||||
maxPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
|
||||
assert ArrayUtil.getUnsignedComparator(config.bytesPerDim())
|
||||
.compare(maxPackedValue, splitDimPos, splitValuesStack[level], splitDimPos)
|
||||
>= 0
|
||||
: "config.bytesPerDim="
|
||||
+ config.bytesPerDim
|
||||
: "config.bytesPerDim()="
|
||||
+ config.bytesPerDim()
|
||||
+ " splitDimPos="
|
||||
+ splitDimsPos[level]
|
||||
+ " config.numIndexDims="
|
||||
+ config.numIndexDims
|
||||
+ " config.numDims="
|
||||
+ config.numDims;
|
||||
+ " config.numIndexDims()="
|
||||
+ config.numIndexDims()
|
||||
+ " config.numDims()="
|
||||
+ config.numDims();
|
||||
// add the split dim value:
|
||||
System.arraycopy(
|
||||
splitValuesStack[level], splitDimPos, maxPackedValue, splitDimPos, config.bytesPerDim);
|
||||
splitValuesStack[level], splitDimPos, maxPackedValue, splitDimPos, config.bytesPerDim());
|
||||
}
|
||||
|
||||
private void pushLeft() throws IOException {
|
||||
|
@ -408,21 +411,21 @@ public class BKDReader extends PointValues {
|
|||
assert splitDimValueStack[level] != null;
|
||||
// save the dimension we are going to change
|
||||
System.arraycopy(
|
||||
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim);
|
||||
assert ArrayUtil.getUnsignedComparator(config.bytesPerDim)
|
||||
minPackedValue, splitDimPos, splitDimValueStack[level], 0, config.bytesPerDim());
|
||||
assert ArrayUtil.getUnsignedComparator(config.bytesPerDim())
|
||||
.compare(minPackedValue, splitDimPos, splitValuesStack[level], splitDimPos)
|
||||
<= 0
|
||||
: "config.bytesPerDim="
|
||||
+ config.bytesPerDim
|
||||
: "config.bytesPerDim()="
|
||||
+ config.bytesPerDim()
|
||||
+ " splitDimPos="
|
||||
+ splitDimsPos[level]
|
||||
+ " config.numIndexDims="
|
||||
+ config.numIndexDims
|
||||
+ " config.numDims="
|
||||
+ config.numDims;
|
||||
+ " config.numIndexDims()="
|
||||
+ config.numIndexDims()
|
||||
+ " config.numDims()="
|
||||
+ config.numDims();
|
||||
// add the split dim value:
|
||||
System.arraycopy(
|
||||
splitValuesStack[level], splitDimPos, minPackedValue, splitDimPos, config.bytesPerDim);
|
||||
splitValuesStack[level], splitDimPos, minPackedValue, splitDimPos, config.bytesPerDim());
|
||||
}
|
||||
|
||||
private void pushRight() throws IOException {
|
||||
|
@ -456,7 +459,7 @@ public class BKDReader extends PointValues {
|
|||
private void popBounds(byte[] packedValue) {
|
||||
// restore the split dimension
|
||||
System.arraycopy(
|
||||
splitDimValueStack[level], 0, packedValue, splitDimsPos[level], config.bytesPerDim);
|
||||
splitDimValueStack[level], 0, packedValue, splitDimsPos[level], config.bytesPerDim());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -517,14 +520,14 @@ public class BKDReader extends PointValues {
|
|||
}
|
||||
// size for an unbalanced tree.
|
||||
return rightMostLeafNode == this.rightMostLeafNode
|
||||
? (long) (numLeaves - 1) * config.maxPointsInLeafNode + lastLeafNodePointCount
|
||||
: (long) numLeaves * config.maxPointsInLeafNode;
|
||||
? (long) (numLeaves - 1) * config.maxPointsInLeafNode() + lastLeafNodePointCount
|
||||
: (long) numLeaves * config.maxPointsInLeafNode();
|
||||
}
|
||||
|
||||
private long sizeFromBalancedTree(int leftMostLeafNode, int rightMostLeafNode) {
|
||||
// number of points that need to be distributed between leaves, one per leaf
|
||||
final int extraPoints =
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode * this.leafNodeOffset) - pointCount);
|
||||
Math.toIntExact(((long) config.maxPointsInLeafNode() * this.leafNodeOffset) - pointCount);
|
||||
assert extraPoints < leafNodeOffset : "point excess should be lower than leafNodeOffset";
|
||||
// offset where we stop adding one point to the leaves
|
||||
final int nodeOffset = leafNodeOffset - extraPoints;
|
||||
|
@ -532,9 +535,9 @@ public class BKDReader extends PointValues {
|
|||
for (int node = leftMostLeafNode; node <= rightMostLeafNode; node++) {
|
||||
// offsetPosition provides which extra point will be added to this node
|
||||
if (balanceTreeNodePosition(0, leafNodeOffset, node - leafNodeOffset, 0, 0) < nodeOffset) {
|
||||
count += config.maxPointsInLeafNode;
|
||||
count += config.maxPointsInLeafNode();
|
||||
} else {
|
||||
count += config.maxPointsInLeafNode - 1;
|
||||
count += config.maxPointsInLeafNode() - 1;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
|
@ -664,12 +667,12 @@ public class BKDReader extends PointValues {
|
|||
if (isLeafNode() == false) {
|
||||
System.arraycopy(
|
||||
negativeDeltas,
|
||||
(level - 1) * config.numIndexDims,
|
||||
(level - 1) * config.numIndexDims(),
|
||||
negativeDeltas,
|
||||
level * config.numIndexDims,
|
||||
config.numIndexDims);
|
||||
level * config.numIndexDims(),
|
||||
config.numIndexDims());
|
||||
negativeDeltas[
|
||||
level * config.numIndexDims + (splitDimsPos[level - 1] / config.bytesPerDim)] =
|
||||
level * config.numIndexDims() + (splitDimsPos[level - 1] / config.bytesPerDim())] =
|
||||
isLeft;
|
||||
|
||||
if (splitValuesStack[level] == null) {
|
||||
|
@ -680,20 +683,20 @@ public class BKDReader extends PointValues {
|
|||
0,
|
||||
splitValuesStack[level],
|
||||
0,
|
||||
config.packedIndexBytesLength);
|
||||
config.packedIndexBytesLength());
|
||||
}
|
||||
|
||||
// read split dim, prefix, firstDiffByteDelta encoded as int:
|
||||
int code = innerNodes.readVInt();
|
||||
final int splitDim = code % config.numIndexDims;
|
||||
splitDimsPos[level] = splitDim * config.bytesPerDim;
|
||||
code /= config.numIndexDims;
|
||||
final int prefix = code % (1 + config.bytesPerDim);
|
||||
final int suffix = config.bytesPerDim - prefix;
|
||||
final int splitDim = code % config.numIndexDims();
|
||||
splitDimsPos[level] = splitDim * config.bytesPerDim();
|
||||
code /= config.numIndexDims();
|
||||
final int prefix = code % (1 + config.bytesPerDim());
|
||||
final int suffix = config.bytesPerDim() - prefix;
|
||||
|
||||
if (suffix > 0) {
|
||||
int firstDiffByteDelta = code / (1 + config.bytesPerDim);
|
||||
if (negativeDeltas[level * config.numIndexDims + splitDim]) {
|
||||
int firstDiffByteDelta = code / (1 + config.bytesPerDim());
|
||||
if (negativeDeltas[level * config.numIndexDims() + splitDim]) {
|
||||
firstDiffByteDelta = -firstDiffByteDelta;
|
||||
}
|
||||
final int startPos = splitDimsPos[level] + prefix;
|
||||
|
@ -737,13 +740,13 @@ public class BKDReader extends PointValues {
|
|||
PointValues.IntersectVisitor visitor)
|
||||
throws IOException {
|
||||
readCommonPrefixes(commonPrefixLengths, scratchDataPackedValue, in);
|
||||
if (config.numIndexDims != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
|
||||
if (config.numIndexDims() != 1 && version >= BKDWriter.VERSION_LEAF_STORES_BOUNDS) {
|
||||
byte[] minPackedValue = scratchMinIndexPackedValue;
|
||||
System.arraycopy(
|
||||
scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
|
||||
scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength());
|
||||
byte[] maxPackedValue = scratchMaxIndexPackedValue;
|
||||
// Copy common prefixes before reading adjusted box
|
||||
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength());
|
||||
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
|
||||
|
||||
// The index gives us range of values for each dimension, but the actual range of values
|
||||
|
@ -801,13 +804,13 @@ public class BKDReader extends PointValues {
|
|||
visitor.grow(count);
|
||||
visitUniqueRawDocValues(scratchDataPackedValue, scratchIterator, count, visitor);
|
||||
} else {
|
||||
if (config.numIndexDims != 1) {
|
||||
if (config.numIndexDims() != 1) {
|
||||
byte[] minPackedValue = scratchMinIndexPackedValue;
|
||||
System.arraycopy(
|
||||
scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength);
|
||||
scratchDataPackedValue, 0, minPackedValue, 0, config.packedIndexBytesLength());
|
||||
byte[] maxPackedValue = scratchMaxIndexPackedValue;
|
||||
// Copy common prefixes before reading adjusted box
|
||||
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength);
|
||||
System.arraycopy(minPackedValue, 0, maxPackedValue, 0, config.packedIndexBytesLength());
|
||||
readMinMax(commonPrefixLengths, minPackedValue, maxPackedValue, in);
|
||||
|
||||
// The index gives us range of values for each dimension, but the actual range of values
|
||||
|
@ -853,12 +856,12 @@ public class BKDReader extends PointValues {
|
|||
private void readMinMax(
|
||||
int[] commonPrefixLengths, byte[] minPackedValue, byte[] maxPackedValue, IndexInput in)
|
||||
throws IOException {
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
int prefix = commonPrefixLengths[dim];
|
||||
in.readBytes(
|
||||
minPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
|
||||
minPackedValue, dim * config.bytesPerDim() + prefix, config.bytesPerDim() - prefix);
|
||||
in.readBytes(
|
||||
maxPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
|
||||
maxPackedValue, dim * config.bytesPerDim() + prefix, config.bytesPerDim() - prefix);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -874,10 +877,12 @@ public class BKDReader extends PointValues {
|
|||
int i;
|
||||
for (i = 0; i < count; ) {
|
||||
int length = in.readVInt();
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
int prefix = commonPrefixLengths[dim];
|
||||
in.readBytes(
|
||||
scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
|
||||
scratchPackedValue,
|
||||
dim * config.bytesPerDim() + prefix,
|
||||
config.bytesPerDim() - prefix);
|
||||
}
|
||||
scratchIterator.reset(i, length);
|
||||
visitor.visit(scratchIterator, scratchPackedValue);
|
||||
|
@ -912,17 +917,19 @@ public class BKDReader extends PointValues {
|
|||
// the byte at `compressedByteOffset` is compressed using run-length compression,
|
||||
// other suffix bytes are stored verbatim
|
||||
final int compressedByteOffset =
|
||||
compressedDim * config.bytesPerDim + commonPrefixLengths[compressedDim];
|
||||
compressedDim * config.bytesPerDim() + commonPrefixLengths[compressedDim];
|
||||
commonPrefixLengths[compressedDim]++;
|
||||
int i;
|
||||
for (i = 0; i < count; ) {
|
||||
scratchPackedValue[compressedByteOffset] = in.readByte();
|
||||
final int runLen = Byte.toUnsignedInt(in.readByte());
|
||||
for (int j = 0; j < runLen; ++j) {
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
int prefix = commonPrefixLengths[dim];
|
||||
in.readBytes(
|
||||
scratchPackedValue, dim * config.bytesPerDim + prefix, config.bytesPerDim - prefix);
|
||||
scratchPackedValue,
|
||||
dim * config.bytesPerDim() + prefix,
|
||||
config.bytesPerDim() - prefix);
|
||||
}
|
||||
visitor.visit(scratchIterator.docIDs[i + j], scratchPackedValue);
|
||||
}
|
||||
|
@ -937,7 +944,7 @@ public class BKDReader extends PointValues {
|
|||
private int readCompressedDim(IndexInput in) throws IOException {
|
||||
int compressedDim = in.readByte();
|
||||
if (compressedDim < -2
|
||||
|| compressedDim >= config.numDims
|
||||
|| compressedDim >= config.numDims()
|
||||
|| (version < BKDWriter.VERSION_LOW_CARDINALITY_LEAVES && compressedDim == -2)) {
|
||||
throw new CorruptIndexException("Got compressedDim=" + compressedDim, in);
|
||||
}
|
||||
|
@ -946,11 +953,11 @@ public class BKDReader extends PointValues {
|
|||
|
||||
private void readCommonPrefixes(
|
||||
int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
int prefix = in.readVInt();
|
||||
commonPrefixLengths[dim] = prefix;
|
||||
if (prefix > 0) {
|
||||
in.readBytes(scratchPackedValue, dim * config.bytesPerDim, prefix);
|
||||
in.readBytes(scratchPackedValue, dim * config.bytesPerDim(), prefix);
|
||||
}
|
||||
// System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
|
||||
}
|
||||
|
@ -974,17 +981,17 @@ public class BKDReader extends PointValues {
|
|||
|
||||
@Override
|
||||
public int getNumDimensions() throws IOException {
|
||||
return config.numDims;
|
||||
return config.numDims();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getNumIndexDimensions() throws IOException {
|
||||
return config.numIndexDims;
|
||||
return config.numIndexDims();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBytesPerDimension() throws IOException {
|
||||
return config.bytesPerDim;
|
||||
return config.bytesPerDim();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -40,11 +40,11 @@ public final class HeapPointWriter implements PointWriter {
|
|||
|
||||
public HeapPointWriter(BKDConfig config, int size) {
|
||||
this.config = config;
|
||||
this.block = new byte[config.bytesPerDoc * size];
|
||||
this.block = new byte[config.bytesPerDoc() * size];
|
||||
this.size = size;
|
||||
this.dimComparator = ArrayUtil.getUnsignedComparator(config.bytesPerDim);
|
||||
this.dataDimsAndDocLength = config.bytesPerDoc - config.packedIndexBytesLength;
|
||||
this.scratch = new byte[config.bytesPerDoc];
|
||||
this.dimComparator = ArrayUtil.getUnsignedComparator(config.bytesPerDim());
|
||||
this.dataDimsAndDocLength = config.bytesPerDoc() - config.packedIndexBytesLength();
|
||||
this.scratch = new byte[config.bytesPerDoc()];
|
||||
if (size > 0) {
|
||||
pointValue = new HeapPointValue(config, block);
|
||||
} else {
|
||||
|
@ -56,23 +56,23 @@ public final class HeapPointWriter implements PointWriter {
|
|||
/** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
|
||||
public PointValue getPackedValueSlice(int index) {
|
||||
assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
|
||||
pointValue.setOffset(index * config.bytesPerDoc);
|
||||
pointValue.setOffset(index * config.bytesPerDoc());
|
||||
return pointValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void append(byte[] packedValue, int docID) {
|
||||
assert closed == false : "point writer is already closed";
|
||||
assert packedValue.length == config.packedBytesLength
|
||||
assert packedValue.length == config.packedBytesLength()
|
||||
: "[packedValue] must have length ["
|
||||
+ config.packedBytesLength
|
||||
+ config.packedBytesLength()
|
||||
+ "] but was ["
|
||||
+ packedValue.length
|
||||
+ "]";
|
||||
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
||||
final int position = nextWrite * config.bytesPerDoc;
|
||||
System.arraycopy(packedValue, 0, block, position, config.packedBytesLength);
|
||||
BitUtil.VH_BE_INT.set(block, position + config.packedBytesLength, docID);
|
||||
final int position = nextWrite * config.bytesPerDoc();
|
||||
System.arraycopy(packedValue, 0, block, position, config.packedBytesLength());
|
||||
BitUtil.VH_BE_INT.set(block, position + config.packedBytesLength(), docID);
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
|
@ -81,33 +81,33 @@ public final class HeapPointWriter implements PointWriter {
|
|||
assert closed == false : "point writer is already closed";
|
||||
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
||||
final BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
assert packedValueDocID.length == config.bytesPerDoc
|
||||
assert packedValueDocID.length == config.bytesPerDoc()
|
||||
: "[packedValue] must have length ["
|
||||
+ (config.bytesPerDoc)
|
||||
+ (config.bytesPerDoc())
|
||||
+ "] but was ["
|
||||
+ packedValueDocID.length
|
||||
+ "]";
|
||||
final int position = nextWrite * config.bytesPerDoc;
|
||||
final int position = nextWrite * config.bytesPerDoc();
|
||||
System.arraycopy(
|
||||
packedValueDocID.bytes, packedValueDocID.offset, block, position, config.bytesPerDoc);
|
||||
packedValueDocID.bytes, packedValueDocID.offset, block, position, config.bytesPerDoc());
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
/** Swaps the point at point {@code i} with the point at position {@code j} */
|
||||
void swap(int i, int j) {
|
||||
final int indexI = i * config.bytesPerDoc;
|
||||
final int indexJ = j * config.bytesPerDoc;
|
||||
final int indexI = i * config.bytesPerDoc();
|
||||
final int indexJ = j * config.bytesPerDoc();
|
||||
// scratch1 = values[i]
|
||||
System.arraycopy(block, indexI, scratch, 0, config.bytesPerDoc);
|
||||
System.arraycopy(block, indexI, scratch, 0, config.bytesPerDoc());
|
||||
// values[i] = values[j]
|
||||
System.arraycopy(block, indexJ, block, indexI, config.bytesPerDoc);
|
||||
System.arraycopy(block, indexJ, block, indexI, config.bytesPerDoc());
|
||||
// values[j] = scratch1
|
||||
System.arraycopy(scratch, 0, block, indexJ, config.bytesPerDoc);
|
||||
System.arraycopy(scratch, 0, block, indexJ, config.bytesPerDoc());
|
||||
}
|
||||
|
||||
/** Return the byte at position {@code k} of the point at position {@code i} */
|
||||
int byteAt(int i, int k) {
|
||||
return block[i * config.bytesPerDoc + k] & 0xff;
|
||||
return block[i * config.bytesPerDoc() + k] & 0xff;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -115,7 +115,7 @@ public final class HeapPointWriter implements PointWriter {
|
|||
* at the given offset
|
||||
*/
|
||||
void copyDim(int i, int dim, byte[] bytes, int offset) {
|
||||
System.arraycopy(block, i * config.bytesPerDoc + dim, bytes, offset, config.bytesPerDim);
|
||||
System.arraycopy(block, i * config.bytesPerDoc() + dim, bytes, offset, config.bytesPerDim());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -125,7 +125,7 @@ public final class HeapPointWriter implements PointWriter {
|
|||
void copyDataDimsAndDoc(int i, byte[] bytes, int offset) {
|
||||
System.arraycopy(
|
||||
block,
|
||||
i * config.bytesPerDoc + config.packedIndexBytesLength,
|
||||
i * config.bytesPerDoc() + config.packedIndexBytesLength(),
|
||||
bytes,
|
||||
offset,
|
||||
dataDimsAndDocLength);
|
||||
|
@ -136,8 +136,8 @@ public final class HeapPointWriter implements PointWriter {
|
|||
* position {@code j}
|
||||
*/
|
||||
int compareDim(int i, int j, int dim) {
|
||||
final int iOffset = i * config.bytesPerDoc + dim;
|
||||
final int jOffset = j * config.bytesPerDoc + dim;
|
||||
final int iOffset = i * config.bytesPerDoc() + dim;
|
||||
final int jOffset = j * config.bytesPerDoc() + dim;
|
||||
return compareDim(block, iOffset, block, jOffset);
|
||||
}
|
||||
|
||||
|
@ -146,7 +146,7 @@ public final class HeapPointWriter implements PointWriter {
|
|||
* value
|
||||
*/
|
||||
int compareDim(int j, byte[] dimValue, int offset, int dim) {
|
||||
final int jOffset = j * config.bytesPerDoc + dim;
|
||||
final int jOffset = j * config.bytesPerDoc() + dim;
|
||||
return compareDim(dimValue, offset, block, jOffset);
|
||||
}
|
||||
|
||||
|
@ -159,8 +159,8 @@ public final class HeapPointWriter implements PointWriter {
|
|||
* at position {@code j}
|
||||
*/
|
||||
int compareDataDimsAndDoc(int i, int j) {
|
||||
final int iOffset = i * config.bytesPerDoc + config.packedIndexBytesLength;
|
||||
final int jOffset = j * config.bytesPerDoc + config.packedIndexBytesLength;
|
||||
final int iOffset = i * config.bytesPerDoc() + config.packedIndexBytesLength();
|
||||
final int jOffset = j * config.bytesPerDoc() + config.packedIndexBytesLength();
|
||||
return compareDataDimsAndDoc(block, iOffset, block, jOffset);
|
||||
}
|
||||
|
||||
|
@ -169,7 +169,7 @@ public final class HeapPointWriter implements PointWriter {
|
|||
* provided value
|
||||
*/
|
||||
int compareDataDimsAndDoc(int j, byte[] dataDimsAndDocs, int offset) {
|
||||
final int jOffset = j * config.bytesPerDoc + config.packedIndexBytesLength;
|
||||
final int jOffset = j * config.bytesPerDoc() + config.packedIndexBytesLength();
|
||||
return compareDataDimsAndDoc(dataDimsAndDocs, offset, block, jOffset);
|
||||
}
|
||||
|
||||
|
@ -187,11 +187,11 @@ public final class HeapPointWriter implements PointWriter {
|
|||
public int computeCardinality(int from, int to, int[] commonPrefixLengths) {
|
||||
int leafCardinality = 1;
|
||||
for (int i = from + 1; i < to; i++) {
|
||||
final int pointOffset = (i - 1) * config.bytesPerDoc;
|
||||
final int nextPointOffset = pointOffset + config.bytesPerDoc;
|
||||
for (int dim = 0; dim < config.numDims; dim++) {
|
||||
final int start = dim * config.bytesPerDim + commonPrefixLengths[dim];
|
||||
final int end = dim * config.bytesPerDim + config.bytesPerDim;
|
||||
final int pointOffset = (i - 1) * config.bytesPerDoc();
|
||||
final int nextPointOffset = pointOffset + config.bytesPerDoc();
|
||||
for (int dim = 0; dim < config.numDims(); dim++) {
|
||||
final int start = dim * config.bytesPerDim() + commonPrefixLengths[dim];
|
||||
final int end = dim * config.bytesPerDim() + config.bytesPerDim();
|
||||
if (Arrays.mismatch(
|
||||
block,
|
||||
nextPointOffset + start,
|
||||
|
@ -245,9 +245,9 @@ public final class HeapPointWriter implements PointWriter {
|
|||
private final int packedValueLength;
|
||||
|
||||
HeapPointValue(BKDConfig config, byte[] value) {
|
||||
this.packedValueLength = config.packedBytesLength;
|
||||
this.packedValueLength = config.packedBytesLength();
|
||||
this.packedValue = new BytesRef(value, 0, packedValueLength);
|
||||
this.packedValueDocID = new BytesRef(value, 0, config.bytesPerDoc);
|
||||
this.packedValueDocID = new BytesRef(value, 0, config.bytesPerDoc());
|
||||
}
|
||||
|
||||
/** Sets a new value by changing the offset. */
|
||||
|
|
|
@ -55,7 +55,7 @@ public final class MutablePointTreeReaderUtils {
|
|||
// This should be a common situation as IndexWriter accumulates data in doc ID order when
|
||||
// index sorting is not enabled.
|
||||
final int bitsPerDocId = sortedByDocID ? 0 : PackedInts.bitsRequired(maxDoc - 1);
|
||||
new StableMSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
|
||||
new StableMSBRadixSorter(config.packedBytesLength() + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
|
@ -74,10 +74,10 @@ public final class MutablePointTreeReaderUtils {
|
|||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
if (k < config.packedBytesLength) {
|
||||
if (k < config.packedBytesLength()) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, k));
|
||||
} else {
|
||||
final int shift = bitsPerDocId - ((k - config.packedBytesLength + 1) << 3);
|
||||
final int shift = bitsPerDocId - ((k - config.packedBytesLength() + 1) << 3);
|
||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
}
|
||||
}
|
||||
|
@ -95,8 +95,8 @@ public final class MutablePointTreeReaderUtils {
|
|||
BytesRef scratch1,
|
||||
BytesRef scratch2) {
|
||||
|
||||
final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(config.bytesPerDim);
|
||||
final int start = sortedDim * config.bytesPerDim;
|
||||
final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(config.bytesPerDim());
|
||||
final int start = sortedDim * config.bytesPerDim();
|
||||
// No need for a fancy radix sort here, this is called on the leaves only so
|
||||
// there are not many values to sort
|
||||
new IntroSorter() {
|
||||
|
@ -125,11 +125,11 @@ public final class MutablePointTreeReaderUtils {
|
|||
cmp =
|
||||
Arrays.compareUnsigned(
|
||||
pivot.bytes,
|
||||
pivot.offset + config.packedIndexBytesLength,
|
||||
pivot.offset + config.packedBytesLength,
|
||||
pivot.offset + config.packedIndexBytesLength(),
|
||||
pivot.offset + config.packedBytesLength(),
|
||||
scratch2.bytes,
|
||||
scratch2.offset + config.packedIndexBytesLength,
|
||||
scratch2.offset + config.packedBytesLength);
|
||||
scratch2.offset + config.packedIndexBytesLength(),
|
||||
scratch2.offset + config.packedBytesLength());
|
||||
if (cmp == 0) {
|
||||
cmp = pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
|
@ -154,23 +154,23 @@ public final class MutablePointTreeReaderUtils {
|
|||
int mid,
|
||||
BytesRef scratch1,
|
||||
BytesRef scratch2) {
|
||||
final int dimOffset = splitDim * config.bytesPerDim + commonPrefixLen;
|
||||
final int dimCmpBytes = config.bytesPerDim - commonPrefixLen;
|
||||
final int dimOffset = splitDim * config.bytesPerDim() + commonPrefixLen;
|
||||
final int dimCmpBytes = config.bytesPerDim() - commonPrefixLen;
|
||||
final int dataCmpBytes =
|
||||
(config.numDims - config.numIndexDims) * config.bytesPerDim + dimCmpBytes;
|
||||
(config.numDims() - config.numIndexDims()) * config.bytesPerDim() + dimCmpBytes;
|
||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||
new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected Selector getFallbackSelector(int k) {
|
||||
final int dimStart = splitDim * config.bytesPerDim;
|
||||
final int dimStart = splitDim * config.bytesPerDim();
|
||||
final int dataStart =
|
||||
(k < dimCmpBytes)
|
||||
? config.packedIndexBytesLength
|
||||
: config.packedIndexBytesLength + k - dimCmpBytes;
|
||||
final int dataEnd = config.numDims * config.bytesPerDim;
|
||||
? config.packedIndexBytesLength()
|
||||
: config.packedIndexBytesLength() + k - dimCmpBytes;
|
||||
final int dataEnd = config.numDims() * config.bytesPerDim();
|
||||
final ByteArrayComparator dimComparator =
|
||||
ArrayUtil.getUnsignedComparator(config.bytesPerDim);
|
||||
ArrayUtil.getUnsignedComparator(config.bytesPerDim());
|
||||
return new IntroSelector() {
|
||||
|
||||
final BytesRef pivot = scratch1;
|
||||
|
@ -230,7 +230,7 @@ public final class MutablePointTreeReaderUtils {
|
|||
return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
|
||||
} else if (k < dataCmpBytes) {
|
||||
return Byte.toUnsignedInt(
|
||||
reader.getByteAt(i, config.packedIndexBytesLength + k - dimCmpBytes));
|
||||
reader.getByteAt(i, config.packedIndexBytesLength() + k - dimCmpBytes));
|
||||
} else {
|
||||
final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
|
||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
|
|
|
@ -56,7 +56,7 @@ public final class OfflinePointReader implements PointReader {
|
|||
throws IOException {
|
||||
this.config = config;
|
||||
|
||||
if ((start + length) * config.bytesPerDoc + CodecUtil.footerLength()
|
||||
if ((start + length) * config.bytesPerDoc() + CodecUtil.footerLength()
|
||||
> tempDir.fileLength(tempFileName)) {
|
||||
throw new IllegalArgumentException(
|
||||
"requested slice is beyond the length of this file: start="
|
||||
|
@ -64,7 +64,7 @@ public final class OfflinePointReader implements PointReader {
|
|||
+ " length="
|
||||
+ length
|
||||
+ " bytesPerDoc="
|
||||
+ config.bytesPerDoc
|
||||
+ config.bytesPerDoc()
|
||||
+ " fileLength="
|
||||
+ tempDir.fileLength(tempFileName)
|
||||
+ " tempFileName="
|
||||
|
@ -73,15 +73,15 @@ public final class OfflinePointReader implements PointReader {
|
|||
if (reusableBuffer == null) {
|
||||
throw new IllegalArgumentException("[reusableBuffer] cannot be null");
|
||||
}
|
||||
if (reusableBuffer.length < config.bytesPerDoc) {
|
||||
if (reusableBuffer.length < config.bytesPerDoc()) {
|
||||
throw new IllegalArgumentException(
|
||||
"Length of [reusableBuffer] must be bigger than " + config.bytesPerDoc);
|
||||
"Length of [reusableBuffer] must be bigger than " + config.bytesPerDoc());
|
||||
}
|
||||
|
||||
this.maxPointOnHeap = reusableBuffer.length / config.bytesPerDoc;
|
||||
this.maxPointOnHeap = reusableBuffer.length / config.bytesPerDoc();
|
||||
// Best-effort checksumming:
|
||||
if (start == 0
|
||||
&& length * config.bytesPerDoc
|
||||
&& length * config.bytesPerDoc()
|
||||
== tempDir.fileLength(tempFileName) - CodecUtil.footerLength()) {
|
||||
// If we are going to read the entire file, e.g. because BKDWriter is now
|
||||
// partitioning it, we open with checksums:
|
||||
|
@ -96,7 +96,7 @@ public final class OfflinePointReader implements PointReader {
|
|||
|
||||
name = tempFileName;
|
||||
|
||||
long seekFP = start * config.bytesPerDoc;
|
||||
long seekFP = start * config.bytesPerDoc();
|
||||
in.seek(seekFP);
|
||||
countLeft = length;
|
||||
this.onHeapBuffer = reusableBuffer;
|
||||
|
@ -113,11 +113,11 @@ public final class OfflinePointReader implements PointReader {
|
|||
}
|
||||
try {
|
||||
if (countLeft > maxPointOnHeap) {
|
||||
in.readBytes(onHeapBuffer, 0, maxPointOnHeap * config.bytesPerDoc);
|
||||
in.readBytes(onHeapBuffer, 0, maxPointOnHeap * config.bytesPerDoc());
|
||||
pointsInBuffer = maxPointOnHeap - 1;
|
||||
countLeft -= maxPointOnHeap;
|
||||
} else {
|
||||
in.readBytes(onHeapBuffer, 0, (int) countLeft * config.bytesPerDoc);
|
||||
in.readBytes(onHeapBuffer, 0, (int) countLeft * config.bytesPerDoc());
|
||||
pointsInBuffer = Math.toIntExact(countLeft - 1);
|
||||
countLeft = 0;
|
||||
}
|
||||
|
@ -130,7 +130,7 @@ public final class OfflinePointReader implements PointReader {
|
|||
}
|
||||
} else {
|
||||
this.pointsInBuffer--;
|
||||
this.offset += config.bytesPerDoc;
|
||||
this.offset += config.bytesPerDoc();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -162,9 +162,9 @@ public final class OfflinePointReader implements PointReader {
|
|||
final int packedValueLength;
|
||||
|
||||
OfflinePointValue(BKDConfig config, byte[] value) {
|
||||
this.packedValueLength = config.packedBytesLength;
|
||||
this.packedValueLength = config.packedBytesLength();
|
||||
this.packedValue = new BytesRef(value, 0, packedValueLength);
|
||||
this.packedValueDocID = new BytesRef(value, 0, config.bytesPerDoc);
|
||||
this.packedValueDocID = new BytesRef(value, 0, config.bytesPerDoc());
|
||||
}
|
||||
|
||||
/** Sets a new value by changing the offset. */
|
||||
|
|
|
@ -56,9 +56,9 @@ public final class OfflinePointWriter implements PointWriter {
|
|||
@Override
|
||||
public void append(byte[] packedValue, int docID) throws IOException {
|
||||
assert closed == false : "Point writer is already closed";
|
||||
assert packedValue.length == config.packedBytesLength
|
||||
assert packedValue.length == config.packedBytesLength()
|
||||
: "[packedValue] must have length ["
|
||||
+ config.packedBytesLength
|
||||
+ config.packedBytesLength()
|
||||
+ "] but was ["
|
||||
+ packedValue.length
|
||||
+ "]";
|
||||
|
@ -75,9 +75,9 @@ public final class OfflinePointWriter implements PointWriter {
|
|||
public void append(PointValue pointValue) throws IOException {
|
||||
assert closed == false : "Point writer is already closed";
|
||||
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
assert packedValueDocID.length == config.bytesPerDoc
|
||||
assert packedValueDocID.length == config.bytesPerDoc()
|
||||
: "[packedValue and docID] must have length ["
|
||||
+ (config.bytesPerDoc)
|
||||
+ (config.bytesPerDoc())
|
||||
+ "] but was ["
|
||||
+ packedValueDocID.length
|
||||
+ "]";
|
||||
|
@ -89,7 +89,7 @@ public final class OfflinePointWriter implements PointWriter {
|
|||
|
||||
@Override
|
||||
public PointReader getReader(long start, long length) throws IOException {
|
||||
byte[] buffer = new byte[config.bytesPerDoc];
|
||||
byte[] buffer = new byte[config.bytesPerDoc()];
|
||||
return getReader(start, length, buffer);
|
||||
}
|
||||
|
||||
|
|
|
@ -954,22 +954,22 @@ public class TestBKD extends LuceneTestCase {
|
|||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
// System.out.println("visit check docID=" + docID);
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + config.bytesPerDim,
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
queryMin[dim],
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
< 0
|
||||
|| Arrays.compareUnsigned(
|
||||
packedValue,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + config.bytesPerDim,
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
queryMax[dim],
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
> 0) {
|
||||
// System.out.println(" no");
|
||||
return;
|
||||
|
@ -1005,39 +1005,39 @@ public class TestBKD extends LuceneTestCase {
|
|||
@Override
|
||||
public Relation compare(byte[] minPacked, byte[] maxPacked) {
|
||||
boolean crosses = false;
|
||||
for (int dim = 0; dim < config.numIndexDims; dim++) {
|
||||
for (int dim = 0; dim < config.numIndexDims(); dim++) {
|
||||
if (Arrays.compareUnsigned(
|
||||
maxPacked,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + config.bytesPerDim,
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
queryMin[dim],
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
< 0
|
||||
|| Arrays.compareUnsigned(
|
||||
minPacked,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + config.bytesPerDim,
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
queryMax[dim],
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
> 0) {
|
||||
return Relation.CELL_OUTSIDE_QUERY;
|
||||
} else if (Arrays.compareUnsigned(
|
||||
minPacked,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + config.bytesPerDim,
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
queryMin[dim],
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
< 0
|
||||
|| Arrays.compareUnsigned(
|
||||
maxPacked,
|
||||
dim * config.bytesPerDim,
|
||||
dim * config.bytesPerDim + config.bytesPerDim,
|
||||
dim * config.bytesPerDim(),
|
||||
dim * config.bytesPerDim() + config.bytesPerDim(),
|
||||
queryMax[dim],
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
> 0) {
|
||||
crosses = true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.util.bkd;
|
||||
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
public class TestBKDConfig extends LuceneTestCase {
|
||||
|
||||
public void testInvalidNumDims() {
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new BKDConfig(0, 0, 8, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE));
|
||||
assertTrue(ex.getMessage().contains("numDims must be 1 .. " + BKDConfig.MAX_DIMS));
|
||||
}
|
||||
|
||||
public void testInvalidNumIndexedDims() {
|
||||
{
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new BKDConfig(1, 0, 8, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE));
|
||||
assertTrue(ex.getMessage().contains("numIndexDims must be 1 .. " + BKDConfig.MAX_INDEX_DIMS));
|
||||
}
|
||||
{
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new BKDConfig(1, 2, 8, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE));
|
||||
assertTrue(ex.getMessage().contains("numIndexDims cannot exceed numDims"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testInvalidBytesPerDim() {
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new BKDConfig(1, 1, 0, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE));
|
||||
assertTrue(ex.getMessage().contains("bytesPerDim must be > 0"));
|
||||
}
|
||||
|
||||
public void testInvalidMaxPointsPerLeafNode() {
|
||||
{
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(IllegalArgumentException.class, () -> new BKDConfig(1, 1, 8, -1));
|
||||
assertTrue(ex.getMessage().contains("maxPointsInLeafNode must be > 0"));
|
||||
}
|
||||
{
|
||||
IllegalArgumentException ex =
|
||||
expectThrows(
|
||||
IllegalArgumentException.class,
|
||||
() -> new BKDConfig(1, 1, 8, ArrayUtil.MAX_ARRAY_LENGTH + 1));
|
||||
assertTrue(
|
||||
ex.getMessage().contains("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH"));
|
||||
}
|
||||
}
|
||||
}
|
|
@ -37,7 +37,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
new BKDConfig(
|
||||
dimensions, dimensions, bytesPerDimensions, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
PointWriter points = getRandomPointWriter(config, dir, values);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
NumericUtils.intToSortableBytes(1, value, 0);
|
||||
points.append(value, 0);
|
||||
NumericUtils.intToSortableBytes(2, value, 0);
|
||||
|
@ -81,7 +81,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int partitionPoint = TestUtil.nextInt(random(), start + 1, end - 1);
|
||||
int sortedOnHeap = random().nextInt(5000);
|
||||
PointWriter points = getRandomPointWriter(config, dir, values);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
for (int i = 0; i < values; i++) {
|
||||
random().nextBytes(value);
|
||||
points.append(value, i);
|
||||
|
@ -102,7 +102,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int partitionPoint = random().nextInt(values);
|
||||
int sortedOnHeap = random().nextInt(5000);
|
||||
PointWriter points = getRandomPointWriter(config, dir, values);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
random().nextBytes(value);
|
||||
for (int i = 0; i < values; i++) {
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -123,7 +123,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int sortedOnHeap = random().nextInt(5000);
|
||||
BKDConfig config = getRandomConfig();
|
||||
PointWriter points = getRandomPointWriter(config, dir, values);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
random().nextBytes(value);
|
||||
for (int i = 0; i < values; i++) {
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -144,7 +144,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int sortedOnHeap = random().nextInt(5000);
|
||||
BKDConfig config = getRandomConfig();
|
||||
PointWriter points = getRandomPointWriter(config, dir, values);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
random().nextBytes(value);
|
||||
for (int i = 0; i < values; i++) {
|
||||
points.append(value, 0);
|
||||
|
@ -162,7 +162,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int sortedOnHeap = random().nextInt(5000);
|
||||
PointWriter points = getRandomPointWriter(config, dir, values);
|
||||
int numberValues = random().nextInt(8) + 2;
|
||||
byte[][] differentValues = new byte[numberValues][config.packedBytesLength];
|
||||
byte[][] differentValues = new byte[numberValues][config.packedBytesLength()];
|
||||
for (int i = 0; i < numberValues; i++) {
|
||||
random().nextBytes(differentValues[i]);
|
||||
}
|
||||
|
@ -181,9 +181,9 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int partitionPoint = random().nextInt(values);
|
||||
int sortedOnHeap = random().nextInt(5000);
|
||||
PointWriter points = getRandomPointWriter(config, dir, values);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
int dataOnlyDims = config.numDims - config.numIndexDims;
|
||||
byte[] dataValue = new byte[dataOnlyDims * config.bytesPerDim];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
int dataOnlyDims = config.numDims() - config.numIndexDims();
|
||||
byte[] dataValue = new byte[dataOnlyDims * config.bytesPerDim()];
|
||||
random().nextBytes(value);
|
||||
for (int i = 0; i < values; i++) {
|
||||
random().nextBytes(dataValue);
|
||||
|
@ -191,8 +191,8 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
dataValue,
|
||||
0,
|
||||
value,
|
||||
config.numIndexDims * config.bytesPerDim,
|
||||
dataOnlyDims * config.bytesPerDim);
|
||||
config.numIndexDims() * config.bytesPerDim(),
|
||||
dataOnlyDims * config.bytesPerDim());
|
||||
points.append(value, i);
|
||||
}
|
||||
points.close();
|
||||
|
@ -210,9 +210,9 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int sortedOnHeap)
|
||||
throws IOException {
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(config, sortedOnHeap, dir, "test");
|
||||
int dataOnlyDims = config.numDims - config.numIndexDims;
|
||||
int dataOnlyDims = config.numDims() - config.numIndexDims();
|
||||
// we only split by indexed dimension so we check for each only those dimension
|
||||
for (int splitDim = 0; splitDim < config.numIndexDims; splitDim++) {
|
||||
for (int splitDim = 0; splitDim < config.numIndexDims(); splitDim++) {
|
||||
// We need to make a copy of the data as it is deleted in the process
|
||||
BKDRadixSelector.PathSlice inputSlice =
|
||||
new BKDRadixSelector.PathSlice(copyPoints(config, dir, points), 0, points.count());
|
||||
|
@ -226,7 +226,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
// check that left and right slices contain the correct points
|
||||
byte[] max = getMax(config, slices[0], splitDim);
|
||||
byte[] min = getMin(config, slices[1], splitDim);
|
||||
int cmp = Arrays.compareUnsigned(max, 0, config.bytesPerDim, min, 0, config.bytesPerDim);
|
||||
int cmp = Arrays.compareUnsigned(max, 0, config.bytesPerDim(), min, 0, config.bytesPerDim());
|
||||
assertTrue(cmp <= 0);
|
||||
if (cmp == 0) {
|
||||
byte[] maxDataDim = getMaxDataDimension(config, slices[0], max, splitDim);
|
||||
|
@ -235,10 +235,10 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
Arrays.compareUnsigned(
|
||||
maxDataDim,
|
||||
0,
|
||||
dataOnlyDims * config.bytesPerDim,
|
||||
dataOnlyDims * config.bytesPerDim(),
|
||||
minDataDim,
|
||||
0,
|
||||
dataOnlyDims * config.bytesPerDim);
|
||||
dataOnlyDims * config.bytesPerDim());
|
||||
assertTrue(cmp <= 0);
|
||||
if (cmp == 0) {
|
||||
int maxDocID = getMaxDocId(config, slices[0], splitDim, partitionPoint, maxDataDim);
|
||||
|
@ -270,9 +270,9 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
byte[] pointsMax = getMax(config, inputSlice, splitDim);
|
||||
byte[] pointsMin = getMin(config, inputSlice, splitDim);
|
||||
int commonPrefixLength =
|
||||
Arrays.mismatch(pointsMin, 0, config.bytesPerDim, pointsMax, 0, config.bytesPerDim);
|
||||
Arrays.mismatch(pointsMin, 0, config.bytesPerDim(), pointsMax, 0, config.bytesPerDim());
|
||||
if (commonPrefixLength == -1) {
|
||||
commonPrefixLength = config.bytesPerDim;
|
||||
commonPrefixLength = config.bytesPerDim();
|
||||
}
|
||||
return (random().nextBoolean())
|
||||
? commonPrefixLength
|
||||
|
@ -300,22 +300,23 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
|
||||
private byte[] getMin(BKDConfig config, BKDRadixSelector.PathSlice pathSlice, int dimension)
|
||||
throws IOException {
|
||||
byte[] min = new byte[config.bytesPerDim];
|
||||
byte[] min = new byte[config.bytesPerDim()];
|
||||
Arrays.fill(min, (byte) 0xff);
|
||||
try (PointReader reader = pathSlice.writer.getReader(pathSlice.start, pathSlice.count)) {
|
||||
byte[] value = new byte[config.bytesPerDim];
|
||||
byte[] value = new byte[config.bytesPerDim()];
|
||||
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
System.arraycopy(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + dimension * config.bytesPerDim,
|
||||
packedValue.offset + dimension * config.bytesPerDim(),
|
||||
value,
|
||||
0,
|
||||
config.bytesPerDim);
|
||||
if (Arrays.compareUnsigned(min, 0, config.bytesPerDim, value, 0, config.bytesPerDim) > 0) {
|
||||
System.arraycopy(value, 0, min, 0, config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
if (Arrays.compareUnsigned(min, 0, config.bytesPerDim(), value, 0, config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(value, 0, min, 0, config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -334,16 +335,16 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
int offset = dimension * config.bytesPerDim;
|
||||
int dataOffset = config.packedIndexBytesLength;
|
||||
int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
|
||||
int offset = dimension * config.bytesPerDim();
|
||||
int dataOffset = config.packedIndexBytesLength();
|
||||
int dataLength = (config.numDims() - config.numIndexDims()) * config.bytesPerDim();
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim,
|
||||
packedValue.offset + offset + config.bytesPerDim(),
|
||||
partitionPoint,
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
== 0
|
||||
&& Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
|
@ -366,38 +367,38 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
private byte[] getMinDataDimension(
|
||||
BKDConfig config, BKDRadixSelector.PathSlice p, byte[] minDim, int splitDim)
|
||||
throws IOException {
|
||||
final int numDataDims = config.numDims - config.numIndexDims;
|
||||
byte[] min = new byte[numDataDims * config.bytesPerDim];
|
||||
final int numDataDims = config.numDims() - config.numIndexDims();
|
||||
byte[] min = new byte[numDataDims * config.bytesPerDim()];
|
||||
Arrays.fill(min, (byte) 0xff);
|
||||
int offset = splitDim * config.bytesPerDim;
|
||||
int offset = splitDim * config.bytesPerDim();
|
||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||
byte[] value = new byte[numDataDims * config.bytesPerDim];
|
||||
byte[] value = new byte[numDataDims * config.bytesPerDim()];
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
if (Arrays.mismatch(
|
||||
minDim,
|
||||
0,
|
||||
config.bytesPerDim,
|
||||
config.bytesPerDim(),
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim)
|
||||
packedValue.offset + offset + config.bytesPerDim())
|
||||
== -1) {
|
||||
System.arraycopy(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + config.numIndexDims * config.bytesPerDim,
|
||||
packedValue.offset + config.numIndexDims() * config.bytesPerDim(),
|
||||
value,
|
||||
0,
|
||||
numDataDims * config.bytesPerDim);
|
||||
numDataDims * config.bytesPerDim());
|
||||
if (Arrays.compareUnsigned(
|
||||
min,
|
||||
0,
|
||||
numDataDims * config.bytesPerDim,
|
||||
numDataDims * config.bytesPerDim(),
|
||||
value,
|
||||
0,
|
||||
numDataDims * config.bytesPerDim)
|
||||
numDataDims * config.bytesPerDim())
|
||||
> 0) {
|
||||
System.arraycopy(value, 0, min, 0, numDataDims * config.bytesPerDim);
|
||||
System.arraycopy(value, 0, min, 0, numDataDims * config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -407,21 +408,22 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
|
||||
private byte[] getMax(BKDConfig config, BKDRadixSelector.PathSlice p, int dimension)
|
||||
throws IOException {
|
||||
byte[] max = new byte[config.bytesPerDim];
|
||||
byte[] max = new byte[config.bytesPerDim()];
|
||||
Arrays.fill(max, (byte) 0);
|
||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||
byte[] value = new byte[config.bytesPerDim];
|
||||
byte[] value = new byte[config.bytesPerDim()];
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
System.arraycopy(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + dimension * config.bytesPerDim,
|
||||
packedValue.offset + dimension * config.bytesPerDim(),
|
||||
value,
|
||||
0,
|
||||
config.bytesPerDim);
|
||||
if (Arrays.compareUnsigned(max, 0, config.bytesPerDim, value, 0, config.bytesPerDim) < 0) {
|
||||
System.arraycopy(value, 0, max, 0, config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
if (Arrays.compareUnsigned(max, 0, config.bytesPerDim(), value, 0, config.bytesPerDim())
|
||||
< 0) {
|
||||
System.arraycopy(value, 0, max, 0, config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -431,38 +433,38 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
private byte[] getMaxDataDimension(
|
||||
BKDConfig config, BKDRadixSelector.PathSlice p, byte[] maxDim, int splitDim)
|
||||
throws IOException {
|
||||
final int numDataDims = config.numDims - config.numIndexDims;
|
||||
byte[] max = new byte[numDataDims * config.bytesPerDim];
|
||||
final int numDataDims = config.numDims() - config.numIndexDims();
|
||||
byte[] max = new byte[numDataDims * config.bytesPerDim()];
|
||||
Arrays.fill(max, (byte) 0);
|
||||
int offset = splitDim * config.bytesPerDim;
|
||||
int offset = splitDim * config.bytesPerDim();
|
||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||
byte[] value = new byte[numDataDims * config.bytesPerDim];
|
||||
byte[] value = new byte[numDataDims * config.bytesPerDim()];
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
if (Arrays.mismatch(
|
||||
maxDim,
|
||||
0,
|
||||
config.bytesPerDim,
|
||||
config.bytesPerDim(),
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim)
|
||||
packedValue.offset + offset + config.bytesPerDim())
|
||||
== -1) {
|
||||
System.arraycopy(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + config.packedIndexBytesLength,
|
||||
packedValue.offset + config.packedIndexBytesLength(),
|
||||
value,
|
||||
0,
|
||||
numDataDims * config.bytesPerDim);
|
||||
numDataDims * config.bytesPerDim());
|
||||
if (Arrays.compareUnsigned(
|
||||
max,
|
||||
0,
|
||||
numDataDims * config.bytesPerDim,
|
||||
numDataDims * config.bytesPerDim(),
|
||||
value,
|
||||
0,
|
||||
numDataDims * config.bytesPerDim)
|
||||
numDataDims * config.bytesPerDim())
|
||||
< 0) {
|
||||
System.arraycopy(value, 0, max, 0, numDataDims * config.bytesPerDim);
|
||||
System.arraycopy(value, 0, max, 0, numDataDims * config.bytesPerDim());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -482,16 +484,16 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
int offset = dimension * config.bytesPerDim;
|
||||
int dataOffset = config.packedIndexBytesLength;
|
||||
int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
|
||||
int offset = dimension * config.bytesPerDim();
|
||||
int dataOffset = config.packedIndexBytesLength();
|
||||
int dataLength = (config.numDims() - config.numIndexDims()) * config.bytesPerDim();
|
||||
if (Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
packedValue.offset + offset,
|
||||
packedValue.offset + offset + config.bytesPerDim,
|
||||
packedValue.offset + offset + config.bytesPerDim(),
|
||||
partitionPoint,
|
||||
0,
|
||||
config.bytesPerDim)
|
||||
config.bytesPerDim())
|
||||
== 0
|
||||
&& Arrays.compareUnsigned(
|
||||
packedValue.bytes,
|
||||
|
|
|
@ -30,7 +30,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
BKDConfig config = getRandomConfig();
|
||||
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
HeapPointWriter points = new HeapPointWriter(config, numPoints);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
for (int i = 0; i < numPoints; i++) {
|
||||
random().nextBytes(value);
|
||||
points.append(value, i);
|
||||
|
@ -42,7 +42,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
BKDConfig config = getRandomConfig();
|
||||
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
HeapPointWriter points = new HeapPointWriter(config, numPoints);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
random().nextBytes(value);
|
||||
for (int i = 0; i < numPoints; i++) {
|
||||
points.append(value, random().nextInt(numPoints));
|
||||
|
@ -54,7 +54,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
BKDConfig config = getRandomConfig();
|
||||
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
HeapPointWriter points = new HeapPointWriter(config, numPoints);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
random().nextBytes(value);
|
||||
for (int i = 0; i < numPoints; i++) {
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -71,7 +71,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
HeapPointWriter points = new HeapPointWriter(config, numPoints);
|
||||
int numberValues = random().nextInt(8) + 2;
|
||||
byte[][] differentValues = new byte[numberValues][config.packedBytesLength];
|
||||
byte[][] differentValues = new byte[numberValues][config.packedBytesLength()];
|
||||
for (int i = 0; i < numberValues; i++) {
|
||||
random().nextBytes(differentValues[i]);
|
||||
}
|
||||
|
@ -85,9 +85,9 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
BKDConfig config = getRandomConfig();
|
||||
int numPoints = TestUtil.nextInt(random(), 1, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
|
||||
HeapPointWriter points = new HeapPointWriter(config, numPoints);
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
int totalDataDimension = config.numDims - config.numIndexDims;
|
||||
byte[] dataDimensionValues = new byte[totalDataDimension * config.bytesPerDim];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
int totalDataDimension = config.numDims() - config.numIndexDims();
|
||||
byte[] dataDimensionValues = new byte[totalDataDimension * config.bytesPerDim()];
|
||||
random().nextBytes(value);
|
||||
for (int i = 0; i < numPoints; i++) {
|
||||
random().nextBytes(dataDimensionValues);
|
||||
|
@ -95,8 +95,8 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
dataDimensionValues,
|
||||
0,
|
||||
value,
|
||||
config.packedIndexBytesLength,
|
||||
totalDataDimension * config.bytesPerDim);
|
||||
config.packedIndexBytesLength(),
|
||||
totalDataDimension * config.bytesPerDim());
|
||||
points.append(value, random().nextInt(numPoints));
|
||||
}
|
||||
verifySort(config, points, 0, numPoints);
|
||||
|
@ -107,17 +107,17 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
Directory dir = newDirectory();
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(config, 1000, dir, "test");
|
||||
// we check for each dimension
|
||||
for (int splitDim = 0; splitDim < config.numDims; splitDim++) {
|
||||
for (int splitDim = 0; splitDim < config.numDims(); splitDim++) {
|
||||
radixSelector.heapRadixSort(
|
||||
points,
|
||||
start,
|
||||
end,
|
||||
splitDim,
|
||||
getRandomCommonPrefix(config, points, start, end, splitDim));
|
||||
byte[] previous = new byte[config.packedBytesLength];
|
||||
byte[] previous = new byte[config.packedBytesLength()];
|
||||
int previousDocId = -1;
|
||||
Arrays.fill(previous, (byte) 0);
|
||||
int dimOffset = splitDim * config.bytesPerDim;
|
||||
int dimOffset = splitDim * config.bytesPerDim();
|
||||
for (int j = start; j < end; j++) {
|
||||
PointValue pointValue = points.getPackedValueSlice(j);
|
||||
BytesRef value = pointValue.packedValue();
|
||||
|
@ -125,27 +125,27 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
Arrays.compareUnsigned(
|
||||
value.bytes,
|
||||
value.offset + dimOffset,
|
||||
value.offset + dimOffset + config.bytesPerDim,
|
||||
value.offset + dimOffset + config.bytesPerDim(),
|
||||
previous,
|
||||
dimOffset,
|
||||
dimOffset + config.bytesPerDim);
|
||||
dimOffset + config.bytesPerDim());
|
||||
assertTrue(cmp >= 0);
|
||||
if (cmp == 0) {
|
||||
int dataOffset = config.numIndexDims * config.bytesPerDim;
|
||||
int dataOffset = config.numIndexDims() * config.bytesPerDim();
|
||||
cmp =
|
||||
Arrays.compareUnsigned(
|
||||
value.bytes,
|
||||
value.offset + dataOffset,
|
||||
value.offset + config.packedBytesLength,
|
||||
value.offset + config.packedBytesLength(),
|
||||
previous,
|
||||
dataOffset,
|
||||
config.packedBytesLength);
|
||||
config.packedBytesLength());
|
||||
assertTrue(cmp >= 0);
|
||||
}
|
||||
if (cmp == 0) {
|
||||
assertTrue(pointValue.docID() >= previousDocId);
|
||||
}
|
||||
System.arraycopy(value.bytes, value.offset, previous, 0, config.packedBytesLength);
|
||||
System.arraycopy(value.bytes, value.offset, previous, 0, config.packedBytesLength());
|
||||
previousDocId = pointValue.docID();
|
||||
}
|
||||
}
|
||||
|
@ -155,12 +155,12 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
/** returns a common prefix length equal or lower than the current one */
|
||||
private int getRandomCommonPrefix(
|
||||
BKDConfig config, HeapPointWriter points, int start, int end, int sortDim) {
|
||||
int commonPrefixLength = config.bytesPerDim;
|
||||
int commonPrefixLength = config.bytesPerDim();
|
||||
PointValue value = points.getPackedValueSlice(start);
|
||||
BytesRef bytesRef = value.packedValue();
|
||||
byte[] firstValue = new byte[config.bytesPerDim];
|
||||
int offset = sortDim * config.bytesPerDim;
|
||||
System.arraycopy(bytesRef.bytes, bytesRef.offset + offset, firstValue, 0, config.bytesPerDim);
|
||||
byte[] firstValue = new byte[config.bytesPerDim()];
|
||||
int offset = sortDim * config.bytesPerDim();
|
||||
System.arraycopy(bytesRef.bytes, bytesRef.offset + offset, firstValue, 0, config.bytesPerDim());
|
||||
for (int i = start + 1; i < end; i++) {
|
||||
value = points.getPackedValueSlice(i);
|
||||
bytesRef = value.packedValue();
|
||||
|
@ -168,10 +168,10 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
Arrays.mismatch(
|
||||
bytesRef.bytes,
|
||||
bytesRef.offset + offset,
|
||||
bytesRef.offset + offset + config.bytesPerDim,
|
||||
bytesRef.offset + offset + config.bytesPerDim(),
|
||||
firstValue,
|
||||
0,
|
||||
config.bytesPerDim);
|
||||
config.bytesPerDim());
|
||||
if (diff != -1 && commonPrefixLength > diff) {
|
||||
if (diff == 0) {
|
||||
return diff;
|
||||
|
|
|
@ -87,10 +87,10 @@ public class TestMutablePointTreeReaderUtils extends LuceneTestCase {
|
|||
private void doTestSortByDim() {
|
||||
BKDConfig config = createRandomConfig();
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
int[] commonPrefixLengths = new int[config.numDims];
|
||||
int[] commonPrefixLengths = new int[config.numDims()];
|
||||
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int sortedDim = random().nextInt(config.numIndexDims);
|
||||
final int sortedDim = random().nextInt(config.numIndexDims());
|
||||
MutablePointTreeReaderUtils.sortByDim(
|
||||
config,
|
||||
sortedDim,
|
||||
|
@ -101,20 +101,20 @@ public class TestMutablePointTreeReaderUtils extends LuceneTestCase {
|
|||
new BytesRef(),
|
||||
new BytesRef());
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
final int offset = sortedDim * config.bytesPerDim;
|
||||
final int offset = sortedDim * config.bytesPerDim();
|
||||
BytesRef previousValue = reader.points[i - 1].packedValue;
|
||||
BytesRef currentValue = reader.points[i].packedValue;
|
||||
int cmp =
|
||||
Arrays.compareUnsigned(
|
||||
previousValue.bytes,
|
||||
previousValue.offset + offset,
|
||||
previousValue.offset + offset + config.bytesPerDim,
|
||||
previousValue.offset + offset + config.bytesPerDim(),
|
||||
currentValue.bytes,
|
||||
currentValue.offset + offset,
|
||||
currentValue.offset + offset + config.bytesPerDim);
|
||||
currentValue.offset + offset + config.bytesPerDim());
|
||||
if (cmp == 0) {
|
||||
int dataDimOffset = config.packedIndexBytesLength;
|
||||
int dataDimsLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
|
||||
int dataDimOffset = config.packedIndexBytesLength();
|
||||
int dataDimsLength = (config.numDims() - config.numIndexDims()) * config.bytesPerDim();
|
||||
cmp =
|
||||
Arrays.compareUnsigned(
|
||||
previousValue.bytes,
|
||||
|
@ -139,10 +139,10 @@ public class TestMutablePointTreeReaderUtils extends LuceneTestCase {
|
|||
|
||||
private void doTestPartition() {
|
||||
BKDConfig config = createRandomConfig();
|
||||
int[] commonPrefixLengths = new int[config.numDims];
|
||||
int[] commonPrefixLengths = new int[config.numDims()];
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
|
||||
final int splitDim = random().nextInt(config.numIndexDims);
|
||||
final int splitDim = random().nextInt(config.numIndexDims());
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
|
||||
MutablePointTreeReaderUtils.partition(
|
||||
|
@ -157,20 +157,20 @@ public class TestMutablePointTreeReaderUtils extends LuceneTestCase {
|
|||
new BytesRef(),
|
||||
new BytesRef());
|
||||
BytesRef pivotValue = reader.points[pivot].packedValue;
|
||||
int offset = splitDim * config.bytesPerDim;
|
||||
int offset = splitDim * config.bytesPerDim();
|
||||
for (int i = 0; i < points.length; ++i) {
|
||||
BytesRef value = reader.points[i].packedValue;
|
||||
int cmp =
|
||||
Arrays.compareUnsigned(
|
||||
value.bytes,
|
||||
value.offset + offset,
|
||||
value.offset + offset + config.bytesPerDim,
|
||||
value.offset + offset + config.bytesPerDim(),
|
||||
pivotValue.bytes,
|
||||
pivotValue.offset + offset,
|
||||
pivotValue.offset + offset + config.bytesPerDim);
|
||||
pivotValue.offset + offset + config.bytesPerDim());
|
||||
if (cmp == 0) {
|
||||
int dataDimOffset = config.packedIndexBytesLength;
|
||||
int dataDimsLength = (config.numDims - config.numIndexDims) * config.bytesPerDim;
|
||||
int dataDimOffset = config.packedIndexBytesLength();
|
||||
int dataDimsLength = (config.numDims() - config.numIndexDims()) * config.bytesPerDim();
|
||||
cmp =
|
||||
Arrays.compareUnsigned(
|
||||
value.bytes,
|
||||
|
@ -203,24 +203,24 @@ public class TestMutablePointTreeReaderUtils extends LuceneTestCase {
|
|||
|
||||
private static Point[] createRandomPoints(
|
||||
BKDConfig config, int maxDoc, int[] commonPrefixLengths, boolean isDocIdIncremental) {
|
||||
assertTrue(commonPrefixLengths.length == config.numDims);
|
||||
assertTrue(commonPrefixLengths.length == config.numDims());
|
||||
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
|
||||
Point[] points = new Point[numPoints];
|
||||
if (random().nextInt(10) != 0) {
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
random().nextBytes(value);
|
||||
points[i] =
|
||||
new Point(
|
||||
value, isDocIdIncremental ? Math.min(i, maxDoc - 1) : random().nextInt(maxDoc));
|
||||
}
|
||||
for (int i = 0; i < config.numDims; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim);
|
||||
for (int i = 0; i < config.numDims(); ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim());
|
||||
}
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
for (int dim = 0; dim < config.numDims; ++dim) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
for (int dim = 0; dim < config.numDims(); ++dim) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
System.arraycopy(
|
||||
firstValue.bytes,
|
||||
|
@ -232,30 +232,34 @@ public class TestMutablePointTreeReaderUtils extends LuceneTestCase {
|
|||
}
|
||||
} else {
|
||||
// index dim are equal, data dims different
|
||||
int numDataDims = config.numDims - config.numIndexDims;
|
||||
byte[] indexDims = new byte[config.packedIndexBytesLength];
|
||||
int numDataDims = config.numDims() - config.numIndexDims();
|
||||
byte[] indexDims = new byte[config.packedIndexBytesLength()];
|
||||
random().nextBytes(indexDims);
|
||||
byte[] dataDims = new byte[numDataDims * config.bytesPerDim];
|
||||
byte[] dataDims = new byte[numDataDims * config.bytesPerDim()];
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[config.packedBytesLength];
|
||||
System.arraycopy(indexDims, 0, value, 0, config.packedIndexBytesLength);
|
||||
byte[] value = new byte[config.packedBytesLength()];
|
||||
System.arraycopy(indexDims, 0, value, 0, config.packedIndexBytesLength());
|
||||
random().nextBytes(dataDims);
|
||||
System.arraycopy(
|
||||
dataDims, 0, value, config.packedIndexBytesLength, numDataDims * config.bytesPerDim);
|
||||
dataDims,
|
||||
0,
|
||||
value,
|
||||
config.packedIndexBytesLength(),
|
||||
numDataDims * config.bytesPerDim());
|
||||
points[i] =
|
||||
new Point(
|
||||
value, isDocIdIncremental ? Math.min(i, maxDoc - 1) : random().nextInt(maxDoc));
|
||||
}
|
||||
for (int i = 0; i < config.numIndexDims; ++i) {
|
||||
commonPrefixLengths[i] = config.bytesPerDim;
|
||||
for (int i = 0; i < config.numIndexDims(); ++i) {
|
||||
commonPrefixLengths[i] = config.bytesPerDim();
|
||||
}
|
||||
for (int i = config.numIndexDims; i < config.numDims; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim);
|
||||
for (int i = config.numIndexDims(); i < config.numDims(); ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim());
|
||||
}
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
for (int dim = config.numIndexDims; dim < config.numDims; ++dim) {
|
||||
int offset = dim * config.bytesPerDim;
|
||||
for (int dim = config.numIndexDims(); dim < config.numDims(); ++dim) {
|
||||
int offset = dim * config.bytesPerDim();
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
System.arraycopy(
|
||||
firstValue.bytes,
|
||||
|
|
|
@ -308,7 +308,7 @@ public class RandomCodec extends AssertingCodec {
|
|||
protected int split(byte[] minPackedValue, byte[] maxPackedValue, int[] parentDims) {
|
||||
// BKD normally defaults by the widest dimension, to try to make as squarish cells as
|
||||
// possible, but we just pick a random one ;)
|
||||
return random.nextInt(config.numIndexDims);
|
||||
return random.nextInt(config.numIndexDims());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue