diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4f872f5b517..a3d9228ca38 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -138,6 +138,9 @@ Optimizations
* LUCENE-8901: Load frequencies lazily only when needed in BlockDocsEnum and
BlockImpactsEverythingEnum (Mayya Sharipova).
+* LUCENE-8888: Optimize distribution of points with data dimensions in
+ BKD tree leaves. (Ignacio Vera)
+
Test Framework
* LUCENE-8825: CheckHits now display the shard index in case of mismatch
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
index ff6cbaa9488..31ecb122ec5 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
@@ -597,7 +597,7 @@ final class SimpleTextBKDWriter implements Closeable {
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
//We re-use the selector so we do not need to create an object every time.
- BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
+ BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
boolean success = false;
try {
@@ -605,7 +605,7 @@ final class SimpleTextBKDWriter implements Closeable {
build(1, numLeaves, points, out,
radixSelector, minPackedValue, maxPackedValue,
- splitPackedValues, leafBlockFPs);
+ splitPackedValues, leafBlockFPs, new int[maxPointsInLeafNode]);
// If no exception, we should have cleaned everything up:
@@ -877,7 +877,7 @@ final class SimpleTextBKDWriter implements Closeable {
}
// sort by sortedDim
- MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
+ MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer:
@@ -920,7 +920,7 @@ final class SimpleTextBKDWriter implements Closeable {
break;
}
}
- MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
+ MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
@@ -951,7 +951,8 @@ final class SimpleTextBKDWriter implements Closeable {
BKDRadixSelector radixSelector,
byte[] minPackedValue, byte[] maxPackedValue,
byte[] splitPackedValues,
- long[] leafBlockFPs) throws IOException {
+ long[] leafBlockFPs,
+ int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
@@ -1010,7 +1011,12 @@ final class SimpleTextBKDWriter implements Closeable {
// loading the values:
int count = to - from;
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
- writeLeafBlockDocs(out, heapSource.docIDs, from, count);
+ // Write doc IDs
+ int[] docIDs = spareDocIds;
+ for (int i = 0; i < count; i++) {
+ docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
+ }
+ writeLeafBlockDocs(out, spareDocIds, 0, count);
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
// from the index, much like how terms dict does so from the FST:
@@ -1030,7 +1036,7 @@ final class SimpleTextBKDWriter implements Closeable {
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
- heapSource.docIDs, from);
+ docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else {
@@ -1075,12 +1081,12 @@ final class SimpleTextBKDWriter implements Closeable {
// Recurse on left tree:
build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector,
- minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs);
+ minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
// TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right?
// Recurse on right tree:
build(2*nodeID+1, leafNodeOffset, pathSlices[1], out, radixSelector,
- minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs);
+ minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
}
}
@@ -1132,6 +1138,13 @@ final class SimpleTextBKDWriter implements Closeable {
if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
}
+ if (cmp == 0 && numDataDims > numIndexDims) {
+ int dataOffset = numIndexDims * bytesPerDim;
+ cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
+ if (cmp > 0) {
+ throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
+ }
+ }
if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim);
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java
index 6a212d888d7..4ade1040e88 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java
@@ -48,6 +48,8 @@ public final class BKDRadixSelector {
private final int bytesSorted;
//data dimensions size
private final int packedBytesLength;
+ // data dimensions plus docID size
+ private final int packedBytesDocIDLength;
//flag to when we are moving to sort on heap
private final int maxPointsSortInHeap;
//reusable buffer
@@ -60,18 +62,26 @@ public final class BKDRadixSelector {
private final Directory tempDir;
// prefix for temp files
private final String tempFileNamePrefix;
+ // data and index dimensions
+ private final int numDataDims, numIndexDims;
/**
* Sole constructor.
*/
- public BKDRadixSelector(int numDim, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
+ public BKDRadixSelector(int numDataDims, int numIndexDims, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
this.bytesPerDim = bytesPerDim;
- this.packedBytesLength = numDim * bytesPerDim;
- this.bytesSorted = bytesPerDim + Integer.BYTES;
+ this.numDataDims = numDataDims;
+ this.numIndexDims = numIndexDims;
+ this.packedBytesLength = numDataDims * bytesPerDim;
+ this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
+ // Selection and sorting is done in a given dimension. In case the value of the dimension are equal
+ // between two points we tie break first using the data-only dimensions and if those are still equal
+ // we tie-break on the docID. Here we account for all bytes used in the process.
+ this.bytesSorted = bytesPerDim + (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
this.maxPointsSortInHeap = maxPointsSortInHeap;
- int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / (packedBytesLength + Integer.BYTES);
- this.offlineBuffer = new byte[numberOfPointsOffline * (packedBytesLength + Integer.BYTES)];
+ int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / packedBytesDocIDLength;
+ this.offlineBuffer = new byte[numberOfPointsOffline * packedBytesDocIDLength];
this.partitionBucket = new int[bytesSorted];
this.histogram = new long[HISTOGRAM_SIZE];
this.scratch = new byte[bytesSorted];
@@ -134,12 +144,12 @@ public final class BKDRadixSelector {
assert commonPrefixPosition > dimCommonPrefix;
reader.next();
PointValue pointValue = reader.pointValue();
+ BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
// copy dimension
- BytesRef packedValue = pointValue.packedValue();
- System.arraycopy(packedValue.bytes, packedValue.offset + offset, scratch, 0, bytesPerDim);
- // copy docID
- BytesRef docIDBytes = pointValue.docIDBytes();
- System.arraycopy(docIDBytes.bytes, docIDBytes.offset, scratch, bytesPerDim, Integer.BYTES);
+ System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, bytesPerDim);
+ // copy data dimensions and docID
+ System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + numIndexDims * bytesPerDim, scratch, bytesPerDim, (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES);
+
for (long i = from + 1; i < to; i++) {
reader.next();
pointValue = reader.pointValue();
@@ -157,13 +167,15 @@ public final class BKDRadixSelector {
//check common prefix and adjust histogram
final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix;
final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition;
- packedValue = pointValue.packedValue();
- int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValue.bytes, packedValue.offset + offset + startIndex, packedValue.offset + offset + endIndex);
+ packedValueDocID = pointValue.packedValueDocIDBytes();
+ int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValueDocID.bytes, packedValueDocID.offset + offset + startIndex, packedValueDocID.offset + offset + endIndex);
if (j == -1) {
if (commonPrefixPosition > bytesPerDim) {
- //tie-break on docID
- docIDBytes = pointValue.docIDBytes();
- int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition, docIDBytes.bytes, docIDBytes.offset, docIDBytes.offset + commonPrefixPosition - bytesPerDim);
+ //tie-break on data dimensions + docID
+ final int startTieBreak = numIndexDims * bytesPerDim;
+ final int endTieBreak = startTieBreak + commonPrefixPosition - bytesPerDim;
+ int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition,
+ packedValueDocID.bytes, packedValueDocID.offset + startTieBreak, packedValueDocID.offset + endTieBreak);
if (k != -1) {
commonPrefixPosition = bytesPerDim + k;
Arrays.fill(histogram, 0);
@@ -195,8 +207,8 @@ public final class BKDRadixSelector {
BytesRef packedValue = pointValue.packedValue();
bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
} else {
- BytesRef docIDValue = pointValue.docIDBytes();
- bucket = docIDValue.bytes[docIDValue.offset + commonPrefixPosition - bytesPerDim] & 0xff;
+ BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
+ bucket = packedValueDocID.bytes[packedValueDocID.offset + numIndexDims * bytesPerDim + commonPrefixPosition - bytesPerDim] & 0xff;
}
return bucket;
}
@@ -310,10 +322,11 @@ public final class BKDRadixSelector {
return partition;
}
- private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefix) {
- final int offset = dim * bytesPerDim + commonPrefix;
- final int dimCmpBytes = bytesPerDim - commonPrefix;
- new RadixSelector(bytesSorted - commonPrefix) {
+ private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefixLength) {
+ final int dimOffset = dim * bytesPerDim + commonPrefixLength;
+ final int dimCmpBytes = bytesPerDim - commonPrefixLength;
+ final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
+ new RadixSelector(bytesSorted - commonPrefixLength) {
@Override
protected void swap(int i, int j) {
@@ -325,23 +338,23 @@ public final class BKDRadixSelector {
assert k >= 0 : "negative prefix " + k;
if (k < dimCmpBytes) {
// dim bytes
- return points.block[i * packedBytesLength + offset + k] & 0xff;
+ return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
} else {
- // doc id
- int s = 3 - (k - dimCmpBytes);
- return (points.docIDs[i] >>> (s * 8)) & 0xff;
+ // data bytes
+ return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
}
}
@Override
protected Selector getFallbackSelector(int d) {
- int skypedBytes = d + commonPrefix;
- final int start = dim * bytesPerDim + skypedBytes;
- final int end = dim * bytesPerDim + bytesPerDim;
+ final int skypedBytes = d + commonPrefixLength;
+ final int dimStart = dim * bytesPerDim + skypedBytes;
+ final int dimEnd = dim * bytesPerDim + bytesPerDim;
+ final int dataOffset = numIndexDims * bytesPerDim;
+ // data length is composed by the data dimensions plus the docID
+ final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
return new IntroSelector() {
- int pivotDoc = -1;
-
@Override
protected void swap(int i, int j) {
points.swap(i, j);
@@ -350,36 +363,37 @@ public final class BKDRadixSelector {
@Override
protected void setPivot(int i) {
if (skypedBytes < bytesPerDim) {
- System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
+ System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
}
- pivotDoc = points.docIDs[i];
+ System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
}
@Override
protected int compare(int i, int j) {
if (skypedBytes < bytesPerDim) {
- int iOffset = i * packedBytesLength;
- int jOffset = j * packedBytesLength;
- int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
- points.block, jOffset + start, jOffset + end);
+ int iOffset = i * packedBytesDocIDLength;
+ int jOffset = j * packedBytesDocIDLength;
+ int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
- return points.docIDs[i] - points.docIDs[j];
+ int iOffset = i * packedBytesDocIDLength + dataOffset;
+ int jOffset = j * packedBytesDocIDLength + dataOffset;
+ return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
}
@Override
protected int comparePivot(int j) {
if (skypedBytes < bytesPerDim) {
- int jOffset = j * packedBytesLength;
- int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
- points.block, jOffset + start, jOffset + end);
+ int jOffset = j * packedBytesDocIDLength;
+ int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
- return pivotDoc - points.docIDs[j];
+ int jOffset = j * packedBytesDocIDLength + dataOffset;
+ return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
}
};
}
@@ -394,8 +408,9 @@ public final class BKDRadixSelector {
/** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
- final int offset = dim * bytesPerDim + commonPrefixLength;
+ final int dimOffset = dim * bytesPerDim + commonPrefixLength;
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
+ final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
new MSBRadixSorter(bytesSorted - commonPrefixLength) {
@Override
@@ -403,11 +418,10 @@ public final class BKDRadixSelector {
assert k >= 0 : "negative prefix " + k;
if (k < dimCmpBytes) {
// dim bytes
- return points.block[i * packedBytesLength + offset + k] & 0xff;
+ return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
} else {
- // doc id
- int s = 3 - (k - dimCmpBytes);
- return (points.docIDs[i] >>> (s * 8)) & 0xff;
+ // data bytes
+ return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
}
}
@@ -418,13 +432,14 @@ public final class BKDRadixSelector {
@Override
protected Sorter getFallbackSorter(int k) {
- int skypedBytes = k + commonPrefixLength;
- final int start = dim * bytesPerDim + skypedBytes;
- final int end = dim * bytesPerDim + bytesPerDim;
+ final int skypedBytes = k + commonPrefixLength;
+ final int dimStart = dim * bytesPerDim + skypedBytes;
+ final int dimEnd = dim * bytesPerDim + bytesPerDim;
+ final int dataOffset = numIndexDims * bytesPerDim;
+ // data length is composed by the data dimensions plus the docID
+ final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
return new IntroSorter() {
- int pivotDoc = -1;
-
@Override
protected void swap(int i, int j) {
points.swap(i, j);
@@ -433,36 +448,37 @@ public final class BKDRadixSelector {
@Override
protected void setPivot(int i) {
if (skypedBytes < bytesPerDim) {
- System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
+ System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
}
- pivotDoc = points.docIDs[i];
+ System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
}
@Override
protected int compare(int i, int j) {
if (skypedBytes < bytesPerDim) {
- int iOffset = i * packedBytesLength;
- int jOffset = j * packedBytesLength;
- int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
- points.block, jOffset + start, jOffset + end);
+ int iOffset = i * packedBytesDocIDLength;
+ int jOffset = j * packedBytesDocIDLength;
+ int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
- return points.docIDs[i] - points.docIDs[j];
+ int iOffset = i * packedBytesDocIDLength + dataOffset;
+ int jOffset = j * packedBytesDocIDLength + dataOffset;
+ return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
}
@Override
protected int comparePivot(int j) {
if (skypedBytes < bytesPerDim) {
- int jOffset = j * packedBytesLength;
- int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
- points.block, jOffset + start, jOffset + end);
+ int jOffset = j * packedBytesDocIDLength;
+ int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
if (cmp != 0) {
return cmp;
}
}
- return pivotDoc - points.docIDs[j];
+ int jOffset = j * packedBytesDocIDLength + dataOffset;
+ return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
}
};
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 03cd90478c5..5512eff29bc 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -776,7 +776,7 @@ public class BKDWriter implements Closeable {
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
//We re-use the selector so we do not need to create an object every time.
- BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
+ BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
boolean success = false;
try {
@@ -787,7 +787,8 @@ public class BKDWriter implements Closeable {
minPackedValue, maxPackedValue,
parentSplits,
splitPackedValues,
- leafBlockFPs);
+ leafBlockFPs,
+ new int[maxPointsInLeafNode]);
assert Arrays.equals(parentSplits, new int[numIndexDims]);
// If no exception, we should have cleaned everything up:
@@ -1366,7 +1367,7 @@ public class BKDWriter implements Closeable {
}
// sort by sortedDim
- MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
+ MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2);
BytesRef comparator = scratchBytesRef1;
@@ -1435,7 +1436,7 @@ public class BKDWriter implements Closeable {
commonPrefixLen = bytesPerDim;
}
- MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
+ MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value
@@ -1472,7 +1473,8 @@ public class BKDWriter implements Closeable {
byte[] minPackedValue, byte[] maxPackedValue,
int[] parentSplits,
byte[] splitPackedValues,
- long[] leafBlockFPs) throws IOException {
+ long[] leafBlockFPs,
+ int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) {
@@ -1532,7 +1534,13 @@ public class BKDWriter implements Closeable {
// loading the values:
int count = to - from;
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
- writeLeafBlockDocs(out, heapSource.docIDs, from, count);
+ assert count <= spareDocIds.length : "count=" + count + " > length=" + spareDocIds.length;
+ // Write doc IDs
+ int[] docIDs = spareDocIds;
+ for (int i = 0; i < count; i++) {
+ docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
+ }
+ writeLeafBlockDocs(out, docIDs, 0, count);
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
// from the index, much like how terms dict does so from the FST:
@@ -1555,7 +1563,7 @@ public class BKDWriter implements Closeable {
}
};
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
- heapSource.docIDs, from);
+ docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
} else {
@@ -1602,12 +1610,12 @@ public class BKDWriter implements Closeable {
// Recurse on left tree:
build(2 * nodeID, leafNodeOffset, slices[0],
out, radixSelector, minPackedValue, maxSplitPackedValue,
- parentSplits, splitPackedValues, leafBlockFPs);
+ parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
// Recurse on right tree:
build(2 * nodeID + 1, leafNodeOffset, slices[1],
out, radixSelector, minSplitPackedValue, maxPackedValue
- , parentSplits, splitPackedValues, leafBlockFPs);
+ , parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
parentSplits[splitDim]--;
}
@@ -1661,6 +1669,13 @@ public class BKDWriter implements Closeable {
if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
}
+ if (cmp == 0 && numDataDims > numIndexDims) {
+ int dataOffset = numIndexDims * bytesPerDim;
+ cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
+ if (cmp > 0) {
+ throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
+ }
+ }
if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index 735b3138b21..4cee1ef036d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -27,16 +27,16 @@ public final class HeapPointReader implements PointReader {
private int curRead;
final byte[] block;
final int packedBytesLength;
- final int[] docIDs;
+ final int packedBytesDocIDLength;
final int end;
private final HeapPointValue pointValue;
- public HeapPointReader(byte[] block, int packedBytesLength, int[] docIDs, int start, int end) {
+ public HeapPointReader(byte[] block, int packedBytesLength, int start, int end) {
this.block = block;
- this.docIDs = docIDs;
curRead = start-1;
this.end = end;
this.packedBytesLength = packedBytesLength;
+ this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
if (start < end) {
this.pointValue = new HeapPointValue(block, packedBytesLength);
} else {
@@ -53,7 +53,7 @@ public final class HeapPointReader implements PointReader {
@Override
public PointValue pointValue() {
- pointValue.setValue(curRead * packedBytesLength, docIDs[curRead]);
+ pointValue.setOffset(curRead * packedBytesDocIDLength);
return pointValue;
}
@@ -66,21 +66,22 @@ public final class HeapPointReader implements PointReader {
*/
static class HeapPointValue implements PointValue {
- BytesRef packedValue;
- BytesRef docIDBytes;
- int docID;
+ final BytesRef packedValue;
+ final BytesRef packedValueDocID;
+ final int packedValueLength;
- public HeapPointValue(byte[] value, int packedLength) {
- packedValue = new BytesRef(value, 0, packedLength);
- docIDBytes = new BytesRef(new byte[4]);
+ HeapPointValue(byte[] value, int packedValueLength) {
+ this.packedValueLength = packedValueLength;
+ this.packedValue = new BytesRef(value, 0, packedValueLength);
+ this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
}
/**
- * Sets a new value by changing the offset and docID.
+ * Sets a new value by changing the offset.
*/
- public void setValue(int offset, int docID) {
- this.docID = docID;
+ public void setOffset(int offset) {
packedValue.offset = offset;
+ packedValueDocID.offset = offset;
}
@Override
@@ -90,16 +91,14 @@ public final class HeapPointReader implements PointReader {
@Override
public int docID() {
- return docID;
+ int position = packedValueDocID.offset + packedValueLength;
+ return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
+ | ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
}
@Override
- public BytesRef docIDBytes() {
- docIDBytes.bytes[0] = (byte) (docID >> 24);
- docIDBytes.bytes[1] = (byte) (docID >> 16);
- docIDBytes.bytes[2] = (byte) (docID >> 8);
- docIDBytes.bytes[3] = (byte) (docID >> 0);
- return docIDBytes;
+ public BytesRef packedValueDocIDBytes() {
+ return packedValueDocID;
}
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
index 0893c1917eb..cd1b8ac0190 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -25,10 +25,10 @@ import org.apache.lucene.util.FutureArrays;
* @lucene.internal
* */
public final class HeapPointWriter implements PointWriter {
- public final int[] docIDs;
public final byte[] block;
final int size;
final int packedBytesLength;
+ final int packedBytesDocIDLength;
private final byte[] scratch;
private int nextWrite;
private boolean closed;
@@ -37,11 +37,11 @@ public final class HeapPointWriter implements PointWriter {
public HeapPointWriter(int size, int packedBytesLength) {
- this.docIDs = new int[size];
- this.block = new byte[packedBytesLength * size];
- this.size = size;
+ this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
this.packedBytesLength = packedBytesLength;
- this.scratch = new byte[packedBytesLength];
+ this.block = new byte[packedBytesDocIDLength * size];
+ this.size = size;
+ this.scratch = new byte[packedBytesDocIDLength];
if (size > 0) {
pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength);
} else {
@@ -53,7 +53,7 @@ public final class HeapPointWriter implements PointWriter {
/** Returns a reference, in result
, to the byte[] slice holding this value */
public PointValue getPackedValueSlice(int index) {
assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
- pointValue.setValue(index * packedBytesLength, docIDs[index]);
+ pointValue.setOffset(index * packedBytesDocIDLength);
return pointValue;
}
@@ -62,8 +62,12 @@ public final class HeapPointWriter implements PointWriter {
assert closed == false : "point writer is already closed";
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
- System.arraycopy(packedValue, 0, block, nextWrite * packedBytesLength, packedBytesLength);
- docIDs[nextWrite] = docID;
+ System.arraycopy(packedValue, 0, block, nextWrite * packedBytesDocIDLength, packedBytesLength);
+ int position = nextWrite * packedBytesDocIDLength + packedBytesLength;
+ block[position] = (byte) (docID >> 24);
+ block[++position] = (byte) (docID >> 16);
+ block[++position] = (byte) (docID >> 8);
+ block[++position] = (byte) (docID >> 0);
nextWrite++;
}
@@ -71,27 +75,23 @@ public final class HeapPointWriter implements PointWriter {
public void append(PointValue pointValue) {
assert closed == false : "point writer is already closed";
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
- BytesRef packedValue = pointValue.packedValue();
- assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + (packedBytesLength) + "] but was [" + packedValue.length + "]";
- System.arraycopy(packedValue.bytes, packedValue.offset, block, nextWrite * packedBytesLength, packedBytesLength);
- docIDs[nextWrite] = pointValue.docID();
+ BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
+ assert packedValueDocID.length == packedBytesDocIDLength : "[packedValue] must have length [" + (packedBytesDocIDLength) + "] but was [" + packedValueDocID.length + "]";
+ System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset, block, nextWrite * packedBytesDocIDLength, packedBytesDocIDLength);
nextWrite++;
}
public void swap(int i, int j) {
- int docID = docIDs[i];
- docIDs[i] = docIDs[j];
- docIDs[j] = docID;
- int indexI = i * packedBytesLength;
- int indexJ = j * packedBytesLength;
+ int indexI = i * packedBytesDocIDLength;
+ int indexJ = j * packedBytesDocIDLength;
// scratch1 = values[i]
- System.arraycopy(block, indexI, scratch, 0, packedBytesLength);
+ System.arraycopy(block, indexI, scratch, 0, packedBytesDocIDLength);
// values[i] = values[j]
- System.arraycopy(block, indexJ, block, indexI, packedBytesLength);
+ System.arraycopy(block, indexJ, block, indexI, packedBytesDocIDLength);
// values[j] = scratch1
- System.arraycopy(scratch, 0, block, indexJ, packedBytesLength);
+ System.arraycopy(scratch, 0, block, indexJ, packedBytesDocIDLength);
}
public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
@@ -101,8 +101,8 @@ public final class HeapPointWriter implements PointWriter {
for (int dim = 0; dim < numDataDims; dim++) {
final int start = dim * bytesPerDim + commonPrefixLengths[dim];
final int end = dim * bytesPerDim + bytesPerDim;
- if (FutureArrays.mismatch(block, i * packedBytesLength + start, i * packedBytesLength + end,
- block, (i - 1) * packedBytesLength + start, (i - 1) * packedBytesLength + end) != -1) {
+ if (FutureArrays.mismatch(block, i * packedBytesDocIDLength + start, i * packedBytesDocIDLength + end,
+ block, (i - 1) * packedBytesDocIDLength + start, (i - 1) * packedBytesDocIDLength + end) != -1) {
leafCardinality++;
break;
}
@@ -119,9 +119,9 @@ public final class HeapPointWriter implements PointWriter {
@Override
public PointReader getReader(long start, long length) {
assert closed : "point writer is still open and trying to get a reader";
- assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
+ assert start + length <= size: "start=" + start + " length=" + length + " docIDs.length=" + size;
assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
- return new HeapPointReader(block, packedBytesLength, docIDs, (int) start, Math.toIntExact(start+length));
+ return new HeapPointReader(block, packedBytesLength, (int) start, Math.toIntExact(start+length));
}
@Override
@@ -135,6 +135,6 @@ public final class HeapPointWriter implements PointWriter {
@Override
public String toString() {
- return "HeapPointWriter(count=" + nextWrite + " size=" + docIDs.length + ")";
+ return "HeapPointWriter(count=" + nextWrite + " size=" + size + ")";
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
index c56ed71df4d..45e695ba7e1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
@@ -77,7 +77,8 @@ public final class MutablePointsReaderUtils {
protected int comparePivot(int j) {
if (k < packedBytesLength) {
reader.getValue(j, scratch);
- int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k, scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
+ int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k,
+ scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
if (cmp != 0) {
return cmp;
}
@@ -91,14 +92,16 @@ public final class MutablePointsReaderUtils {
}
/** Sort points on the given dimension. */
- public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
+ public static void sortByDim(int numDataDim, int numIndexDim, int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
MutablePointValues reader, int from, int to,
BytesRef scratch1, BytesRef scratch2) {
+ final int start = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
+ final int dimEnd = sortedDim * bytesPerDim + bytesPerDim;
+ final int dataStart = numIndexDim * bytesPerDim;
+ final int dataEnd = dataStart + (numDataDim - numIndexDim) * bytesPerDim;
// No need for a fancy radix sort here, this is called on the leaves only so
// there are not many values to sort
- final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
- final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
new IntroSorter() {
final BytesRef pivot = scratch1;
@@ -118,9 +121,14 @@ public final class MutablePointsReaderUtils {
@Override
protected int comparePivot(int j) {
reader.getValue(j, scratch2);
- int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset, pivot.offset + offset + numBytesToCompare, scratch2.bytes, scratch2.offset + offset, scratch2.offset + offset + numBytesToCompare);
+ int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + start, pivot.offset + dimEnd, scratch2.bytes,
+ scratch2.offset + start, scratch2.offset + dimEnd);
if (cmp == 0) {
- cmp = pivotDoc - reader.getDocID(j);
+ cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
+ scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
+ if (cmp == 0) {
+ cmp = pivotDoc - reader.getDocID(j);
+ }
}
return cmp;
}
@@ -130,16 +138,20 @@ public final class MutablePointsReaderUtils {
/** Partition points around {@code mid}. All values on the left must be less
* than or equal to it and all values on the right must be greater than or
* equal to it. */
- public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
+ public static void partition(int numDataDim, int numIndexDim, int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
MutablePointValues reader, int from, int to, int mid,
BytesRef scratch1, BytesRef scratch2) {
- final int offset = splitDim * bytesPerDim + commonPrefixLen;
- final int cmpBytes = bytesPerDim - commonPrefixLen;
+ final int dimOffset = splitDim * bytesPerDim + commonPrefixLen;
+ final int dimCmpBytes = bytesPerDim - commonPrefixLen;
+ final int dataOffset = numIndexDim * bytesPerDim;
+ final int dataCmpBytes = (numDataDim - numIndexDim) * bytesPerDim + dimCmpBytes;
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
- new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
+ new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
@Override
protected Selector getFallbackSelector(int k) {
+ final int dataStart = (k < dimCmpBytes) ? dataOffset : dataOffset + k - dimCmpBytes;
+ final int dataEnd = numDataDim * bytesPerDim;
return new IntroSelector() {
final BytesRef pivot = scratch1;
@@ -158,9 +170,18 @@ public final class MutablePointsReaderUtils {
@Override
protected int comparePivot(int j) {
- if (k < cmpBytes) {
+ if (k < dimCmpBytes) {
reader.getValue(j, scratch2);
- int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset + k, pivot.offset + offset + k + cmpBytes - k, scratch2.bytes, scratch2.offset + offset + k, scratch2.offset + offset + k + cmpBytes - k);
+ int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dimOffset + k, pivot.offset + dimOffset + dimCmpBytes,
+ scratch2.bytes, scratch2.offset + dimOffset + k, scratch2.offset + dimOffset + dimCmpBytes);
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+ if (k < dataCmpBytes) {
+ reader.getValue(j, scratch2);
+ int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
+ scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
if (cmp != 0) {
return cmp;
}
@@ -177,10 +198,12 @@ public final class MutablePointsReaderUtils {
@Override
protected int byteAt(int i, int k) {
- if (k < cmpBytes) {
- return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
+ if (k < dimCmpBytes) {
+ return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
+ } else if (k < dataCmpBytes) {
+ return Byte.toUnsignedInt(reader.getByteAt(i, dataOffset + k - dimCmpBytes));
} else {
- final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
+ final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 6218429b071..030bd719fea 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -137,12 +137,14 @@ public final class OfflinePointReader implements PointReader {
*/
static class OfflinePointValue implements PointValue {
- BytesRef packedValue;
- BytesRef docIDBytes;
+ final BytesRef packedValue;
+ final BytesRef packedValueDocID;
+ final int packedValueLength;
OfflinePointValue(byte[] value, int packedValueLength) {
- packedValue = new BytesRef(value, 0, packedValueLength);
- docIDBytes = new BytesRef(value, packedValueLength, Integer.BYTES);
+ this.packedValueLength = packedValueLength;
+ this.packedValue = new BytesRef(value, 0, packedValueLength);
+ this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
}
/**
@@ -150,7 +152,7 @@ public final class OfflinePointReader implements PointReader {
*/
public void setOffset(int offset) {
packedValue.offset = offset;
- docIDBytes.offset = offset + packedValue.length;
+ packedValueDocID.offset = offset;
}
@Override
@@ -160,14 +162,14 @@ public final class OfflinePointReader implements PointReader {
@Override
public int docID() {
- int position =docIDBytes.offset;
- return ((docIDBytes.bytes[position] & 0xFF) << 24) | ((docIDBytes.bytes[++position] & 0xFF) << 16)
- | ((docIDBytes.bytes[++position] & 0xFF) << 8) | (docIDBytes.bytes[++position] & 0xFF);
+ int position = packedValueDocID.offset + packedValueLength;
+ return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
+ | ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
}
@Override
- public BytesRef docIDBytes() {
- return docIDBytes;
+ public BytesRef packedValueDocIDBytes() {
+ return packedValueDocID;
}
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 4f4ce9ee740..d9fa86f5307 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -62,12 +62,9 @@ public final class OfflinePointWriter implements PointWriter {
@Override
public void append(PointValue pointValue) throws IOException {
assert closed == false : "Point writer is already closed";
- BytesRef packedValue = pointValue.packedValue();
- assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
- out.writeBytes(packedValue.bytes, packedValue.offset, packedValue.length);
- BytesRef docIDBytes = pointValue.docIDBytes();
- assert docIDBytes.length == Integer.BYTES : "[docIDBytes] must have length [" + Integer.BYTES + "] but was [" + docIDBytes.length + "]";
- out.writeBytes(docIDBytes.bytes, docIDBytes.offset, docIDBytes.length);
+ BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
+ assert packedValueDocID.length == packedBytesLength + Integer.BYTES : "[packedValue and docID] must have length [" + (packedBytesLength + Integer.BYTES) + "] but was [" + packedValueDocID.length + "]";
+ out.writeBytes(packedValueDocID.bytes, packedValueDocID.offset, packedValueDocID.length);
count++;
assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count;
}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java
index 28506278782..d933e81ff61 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointValue.java
@@ -28,10 +28,11 @@ public interface PointValue {
/** Returns the packed values for the dimensions */
BytesRef packedValue();
- /** Returns the document id */
+ /** Returns the docID */
int docID();
- /** Returns the byte representation of the document id */
- BytesRef docIDBytes();
+ /** Returns the byte representation of the packed value
+ * together with the docID */
+ BytesRef packedValueDocIDBytes();
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index 2b3cd8a6f77..7c537a4758d 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -1098,6 +1098,80 @@ public class TestBKD extends LuceneTestCase {
}
}
+ public void testCheckDataDimOptimalOrder() throws IOException {
+ Directory dir = newDirectory();
+ final int numValues = atLeast(5000);
+ final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
+ final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
+ final double maxMB = (float) 3.0 + (3*random().nextDouble());
+
+ final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
+ final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
+
+ final byte[] pointValue1 = new byte[numDataDims * numBytesPerDim];
+ final byte[] pointValue2 = new byte[numDataDims * numBytesPerDim];
+ random().nextBytes(pointValue1);
+ random().nextBytes(pointValue2);
+ // equal index dimensions but different data dimensions
+ for (int i = 0; i < numIndexDims; i++) {
+ System.arraycopy(pointValue1, i * numBytesPerDim, pointValue2, i * numBytesPerDim, numBytesPerDim);
+ }
+
+ BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode,
+ maxMB, 2 * numValues);
+ for (int i = 0; i < numValues; ++i) {
+ w.add(pointValue1, i);
+ w.add(pointValue2, i);
+ }
+ final long indexFP;
+ try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
+ indexFP = w.finish(out);
+ w.close();
+ }
+
+ IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
+ pointsIn.seek(indexFP);
+ BKDReader points = new BKDReader(pointsIn);
+
+ points.intersect(new IntersectVisitor() {
+
+ byte[] previous = null;
+ boolean hasChanged = false;
+
+ @Override
+ public void visit(int docID) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void visit(int docID, byte[] packedValue) {
+ if (previous == null) {
+ previous = new byte[numDataDims * numBytesPerDim];
+ System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
+ } else {
+ int mismatch = FutureArrays.mismatch(packedValue, 0, numDataDims * numBytesPerDim, previous, 0, numDataDims * numBytesPerDim);
+ if (mismatch != -1) {
+ if (hasChanged == false) {
+ hasChanged = true;
+ System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
+ } else {
+ fail("Points are not in optimal order");
+ }
+ }
+ }
+ }
+
+ @Override
+ public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+ return Relation.CELL_CROSSES_QUERY;
+ }
+ });
+
+
+ pointsIn.close();
+ dir.close();
+ }
+
public void test2DLongOrdsOffline() throws Exception {
try (Directory dir = newDirectory()) {
int numDocs = 100000;
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
index 456667339a7..939991c138b 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSelector.java
@@ -209,7 +209,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
}
private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
- BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
+ BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
//we only split by indexed dimension so we check for each only those dimension
for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
//We need to make a copy of the data as it is deleted in the process
@@ -225,9 +225,15 @@ public class TestBKDRadixSelector extends LuceneTestCase {
int cmp = FutureArrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions);
assertTrue(cmp <= 0);
if (cmp == 0) {
- int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint);
- int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint);
- assertTrue(minDocId >= maxDocID);
+ byte[] maxDataDim = getMaxDataDimension(slices[0], bytesPerDimensions, dataDimensions, indexDimensions, max, splitDim);
+ byte[] minDataDim = getMinDataDimension(slices[1], bytesPerDimensions, dataDimensions, indexDimensions, min, splitDim);
+ cmp = FutureArrays.compareUnsigned(maxDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions, minDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions);
+ assertTrue(cmp <= 0);
+ if (cmp == 0) {
+ int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,maxDataDim);
+ int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,minDataDim);
+ assertTrue(minDocId >= maxDocID);
+ }
}
assertTrue(Arrays.equals(partitionPoint, min));
slices[0].writer.destroy();
@@ -293,14 +299,17 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return min;
}
- private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException {
+ private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
int docID = Integer.MAX_VALUE;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
int offset = dimension * bytesPerDimension;
- if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
+ int dataOffset = indexDims * bytesPerDimension;
+ int dataLength = (dataDims - indexDims) * bytesPerDimension;
+ if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
+ && FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
int newDocID = pointValue.docID();
if (newDocID < docID) {
docID = newDocID;
@@ -311,6 +320,26 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return docID;
}
+ private byte[] getMinDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] minDim, int splitDim) throws IOException {
+ byte[] min = new byte[(dataDims - indexDims) * bytesPerDimension];
+ Arrays.fill(min, (byte) 0xff);
+ int offset = splitDim * bytesPerDimension;
+ try (PointReader reader = p.writer.getReader(p.start, p.count)) {
+ byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
+ while (reader.next()) {
+ PointValue pointValue = reader.pointValue();
+ BytesRef packedValue = pointValue.packedValue();
+ if (FutureArrays.mismatch(minDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
+ System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
+ if (FutureArrays.compareUnsigned(min, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) > 0) {
+ System.arraycopy(value, 0, min, 0, (dataDims - indexDims) * bytesPerDimension);
+ }
+ }
+ }
+ }
+ return min;
+ }
+
private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException {
byte[] max = new byte[bytesPerDimension];
Arrays.fill(max, (byte) 0);
@@ -328,14 +357,37 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return max;
}
- private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException {
+ private byte[] getMaxDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] maxDim, int splitDim) throws IOException {
+ byte[] max = new byte[(dataDims - indexDims) * bytesPerDimension];
+ Arrays.fill(max, (byte) 0);
+ int offset = splitDim * bytesPerDimension;
+ try (PointReader reader = p.writer.getReader(p.start, p.count)) {
+ byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
+ while (reader.next()) {
+ PointValue pointValue = reader.pointValue();
+ BytesRef packedValue = pointValue.packedValue();
+ if (FutureArrays.mismatch(maxDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
+ System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
+ if (FutureArrays.compareUnsigned(max, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) < 0) {
+ System.arraycopy(value, 0, max, 0, (dataDims - indexDims) * bytesPerDimension);
+ }
+ }
+ }
+ }
+ return max;
+ }
+
+ private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
int docID = Integer.MIN_VALUE;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
int offset = dimension * bytesPerDimension;
- if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
+ int dataOffset = indexDims * bytesPerDimension;
+ int dataLength = (dataDims - indexDims) * bytesPerDimension;
+ if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
+ && FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
int newDocID = pointValue.docID();
if (newDocID > docID) {
docID = newDocID;
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
index 045f223b5a1..7fecca79cdb 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDRadixSort.java
@@ -117,7 +117,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
int packedBytesLength = dataDimensions * bytesPerDim;
Directory dir = newDirectory();
- BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDim, 1000, dir, "test");
+ BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDim, 1000, dir, "test");
// we check for each dimension
for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
@@ -130,6 +130,11 @@ public class TestBKDRadixSort extends LuceneTestCase {
BytesRef value = pointValue.packedValue();
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
assertTrue(cmp >= 0);
+ if (cmp == 0) {
+ int dataOffset = indexDimensions * bytesPerDim;
+ cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataOffset, value.offset + packedBytesLength, previous, dataOffset, packedBytesLength);
+ assertTrue(cmp >= 0);
+ }
if (cmp == 0) {
assertTrue(pointValue.docID() >= previousDocId);
}
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
index c2a516345bc..d3b105d12c5 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
@@ -38,7 +38,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
private void doTestSort() {
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
- Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
+ Point[] points = createRandomPoints(1, 1, bytesPerDim, maxDoc, new int[1]);
DummyPointsReader reader = new DummyPointsReader(points);
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
Arrays.sort(points, new Comparator() {
@@ -62,25 +62,15 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
}
private void doTestSortByDim() {
- final int numDims = TestUtil.nextInt(random(), 1, 8);
+ final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
+ final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
- Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
- int[] commonPrefixLengths = new int[numDims];
- for (int i = 0; i < commonPrefixLengths.length; ++i) {
- commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
- }
- BytesRef firstValue = points[0].packedValue;
- for (int i = 1; i < points.length; ++i) {
- for (int dim = 0; dim < numDims; ++dim) {
- int offset = dim * bytesPerDim;
- BytesRef packedValue = points[i].packedValue;
- System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
- }
- }
+ int[] commonPrefixLengths = new int[numDataDims];
+ Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
DummyPointsReader reader = new DummyPointsReader(points);
- final int sortedDim = random().nextInt(numDims);
- MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
+ final int sortedDim = random().nextInt(numIndexDims);
+ MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
new BytesRef(), new BytesRef());
for (int i = 1; i < points.length; ++i) {
final int offset = sortedDim * bytesPerDim;
@@ -88,7 +78,13 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
BytesRef currentValue = reader.points[i].packedValue;
int cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim);
if (cmp == 0) {
- cmp = reader.points[i - 1].doc - reader.points[i].doc;
+ int dataDimOffset = numIndexDims * bytesPerDim;
+ int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
+ cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + dataDimOffset, previousValue.offset + dataDimOffset + dataDimsLength,
+ currentValue.bytes, currentValue.offset + dataDimOffset, currentValue.offset + dataDimOffset + dataDimsLength);
+ if (cmp == 0) {
+ cmp = reader.points[i - 1].doc - reader.points[i].doc;
+ }
}
assertTrue(cmp <= 0);
}
@@ -101,29 +97,31 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
}
private void doTestPartition() {
- final int numDims = TestUtil.nextInt(random(), 1, 8);
+ final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
+ final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
+ int[] commonPrefixLengths = new int[numDataDims];
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
- Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
- int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
- final int splitDim = random().nextInt(numDims);
- BytesRef firstValue = points[0].packedValue;
- for (int i = 1; i < points.length; ++i) {
- BytesRef packedValue = points[i].packedValue;
- int offset = splitDim * bytesPerDim;
- System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
- }
+ Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
+ final int splitDim = random().nextInt(numIndexDims);
DummyPointsReader reader = new DummyPointsReader(points);
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
- MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
+ MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLengths[splitDim], reader, 0, points.length, pivot,
new BytesRef(), new BytesRef());
BytesRef pivotValue = reader.points[pivot].packedValue;
int offset = splitDim * bytesPerDim;
for (int i = 0; i < points.length; ++i) {
BytesRef value = reader.points[i].packedValue;
- int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim, pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
+ int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim,
+ pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
if (cmp == 0) {
- cmp = reader.points[i].doc - reader.points[pivot].doc;
+ int dataDimOffset = numIndexDims * bytesPerDim;
+ int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
+ cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataDimOffset, value.offset + dataDimOffset + dataDimsLength,
+ pivotValue.bytes, pivotValue.offset + dataDimOffset, pivotValue.offset + dataDimOffset + dataDimsLength);
+ if (cmp == 0) {
+ cmp = reader.points[i].doc - reader.points[pivot].doc;
+ }
}
if (i < pivot) {
assertTrue(cmp <= 0);
@@ -135,14 +133,54 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
}
}
- private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
- final int packedBytesLength = numDims * bytesPerDim;
+ private static Point[] createRandomPoints(int numDataDims, int numIndexdims, int bytesPerDim, int maxDoc, int[] commonPrefixLengths) {
+ assertTrue(commonPrefixLengths.length == numDataDims);
+ final int packedBytesLength = numDataDims * bytesPerDim;
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
Point[] points = new Point[numPoints];
- for (int i = 0; i < numPoints; ++i) {
- byte[] value = new byte[packedBytesLength];
- random().nextBytes(value);
- points[i] = new Point(value, random().nextInt(maxDoc));
+ if (random().nextInt(5) != 0) {
+ for (int i = 0; i < numPoints; ++i) {
+ byte[] value = new byte[packedBytesLength];
+ random().nextBytes(value);
+ points[i] = new Point(value, random().nextInt(maxDoc));
+ }
+ for (int i = 0; i < numDataDims; ++i) {
+ commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
+ }
+ BytesRef firstValue = points[0].packedValue;
+ for (int i = 1; i < points.length; ++i) {
+ for (int dim = 0; dim < numDataDims; ++dim) {
+ int offset = dim * bytesPerDim;
+ BytesRef packedValue = points[i].packedValue;
+ System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
+ }
+ }
+ } else {
+ //index dim are equal, data dims different
+ byte[] indexDims = new byte[numIndexdims * bytesPerDim];
+ random().nextBytes(indexDims);
+ byte[] dataDims = new byte[(numDataDims - numIndexdims) * bytesPerDim];
+ for (int i = 0; i < numPoints; ++i) {
+ byte[] value = new byte[packedBytesLength];
+ System.arraycopy(indexDims, 0, value, 0, numIndexdims * bytesPerDim);
+ random().nextBytes(dataDims);
+ System.arraycopy(dataDims, 0, value, numIndexdims * bytesPerDim, (numDataDims - numIndexdims) * bytesPerDim);
+ points[i] = new Point(value, random().nextInt(maxDoc));
+ }
+ for (int i = 0; i < numIndexdims; ++i) {
+ commonPrefixLengths[i] = bytesPerDim;
+ }
+ for (int i = numDataDims; i < numDataDims; ++i) {
+ commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
+ }
+ BytesRef firstValue = points[0].packedValue;
+ for (int i = 1; i < points.length; ++i) {
+ for (int dim = numIndexdims; dim < numDataDims; ++dim) {
+ int offset = dim * bytesPerDim;
+ BytesRef packedValue = points[i].packedValue;
+ System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
+ }
+ }
}
return points;
}