mirror of https://github.com/apache/lucene.git
LUCENE-8888: Improve distribution of points with data dimensions in BKD tree leaves (#747)
This commit is contained in:
parent
792871c480
commit
ef64f7af3e
|
@ -138,6 +138,9 @@ Optimizations
|
|||
* LUCENE-8901: Load frequencies lazily only when needed in BlockDocsEnum and
|
||||
BlockImpactsEverythingEnum (Mayya Sharipova).
|
||||
|
||||
* LUCENE-8888: Optimize distribution of points with data dimensions in
|
||||
BKD tree leaves. (Ignacio Vera)
|
||||
|
||||
Test Framework
|
||||
|
||||
* LUCENE-8825: CheckHits now display the shard index in case of mismatch
|
||||
|
|
|
@ -597,7 +597,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||
|
||||
//We re-use the selector so we do not need to create an object every time.
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -605,7 +605,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
build(1, numLeaves, points, out,
|
||||
radixSelector, minPackedValue, maxPackedValue,
|
||||
splitPackedValues, leafBlockFPs);
|
||||
splitPackedValues, leafBlockFPs, new int[maxPointsInLeafNode]);
|
||||
|
||||
|
||||
// If no exception, we should have cleaned everything up:
|
||||
|
@ -877,7 +877,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
// sort by sortedDim
|
||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
|
||||
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
|
||||
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
// Save the block file pointer:
|
||||
|
@ -920,7 +920,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
break;
|
||||
}
|
||||
}
|
||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
// set the split value
|
||||
|
@ -951,7 +951,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
BKDRadixSelector radixSelector,
|
||||
byte[] minPackedValue, byte[] maxPackedValue,
|
||||
byte[] splitPackedValues,
|
||||
long[] leafBlockFPs) throws IOException {
|
||||
long[] leafBlockFPs,
|
||||
int[] spareDocIds) throws IOException {
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
|
||||
|
@ -1010,7 +1011,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
// loading the values:
|
||||
int count = to - from;
|
||||
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
|
||||
writeLeafBlockDocs(out, heapSource.docIDs, from, count);
|
||||
// Write doc IDs
|
||||
int[] docIDs = spareDocIds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
|
||||
}
|
||||
writeLeafBlockDocs(out, spareDocIds, 0, count);
|
||||
|
||||
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
|
||||
// from the index, much like how terms dict does so from the FST:
|
||||
|
@ -1030,7 +1036,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||
heapSource.docIDs, from);
|
||||
docIDs, 0);
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
|
||||
|
||||
} else {
|
||||
|
@ -1075,12 +1081,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
|
||||
// Recurse on left tree:
|
||||
build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector,
|
||||
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs);
|
||||
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
|
||||
// TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right?
|
||||
// Recurse on right tree:
|
||||
build(2*nodeID+1, leafNodeOffset, pathSlices[1], out, radixSelector,
|
||||
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs);
|
||||
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1132,6 +1138,13 @@ final class SimpleTextBKDWriter implements Closeable {
|
|||
if (cmp > 0) {
|
||||
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
|
||||
}
|
||||
if (cmp == 0 && numDataDims > numIndexDims) {
|
||||
int dataOffset = numIndexDims * bytesPerDim;
|
||||
cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||
}
|
||||
}
|
||||
if (cmp == 0 && doc < lastDoc) {
|
||||
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim);
|
||||
}
|
||||
|
|
|
@ -48,6 +48,8 @@ public final class BKDRadixSelector {
|
|||
private final int bytesSorted;
|
||||
//data dimensions size
|
||||
private final int packedBytesLength;
|
||||
// data dimensions plus docID size
|
||||
private final int packedBytesDocIDLength;
|
||||
//flag to when we are moving to sort on heap
|
||||
private final int maxPointsSortInHeap;
|
||||
//reusable buffer
|
||||
|
@ -60,18 +62,26 @@ public final class BKDRadixSelector {
|
|||
private final Directory tempDir;
|
||||
// prefix for temp files
|
||||
private final String tempFileNamePrefix;
|
||||
// data and index dimensions
|
||||
private final int numDataDims, numIndexDims;
|
||||
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
*/
|
||||
public BKDRadixSelector(int numDim, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
|
||||
public BKDRadixSelector(int numDataDims, int numIndexDims, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
|
||||
this.bytesPerDim = bytesPerDim;
|
||||
this.packedBytesLength = numDim * bytesPerDim;
|
||||
this.bytesSorted = bytesPerDim + Integer.BYTES;
|
||||
this.numDataDims = numDataDims;
|
||||
this.numIndexDims = numIndexDims;
|
||||
this.packedBytesLength = numDataDims * bytesPerDim;
|
||||
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
|
||||
// Selection and sorting is done in a given dimension. In case the value of the dimension are equal
|
||||
// between two points we tie break first using the data-only dimensions and if those are still equal
|
||||
// we tie-break on the docID. Here we account for all bytes used in the process.
|
||||
this.bytesSorted = bytesPerDim + (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
|
||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||
int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / (packedBytesLength + Integer.BYTES);
|
||||
this.offlineBuffer = new byte[numberOfPointsOffline * (packedBytesLength + Integer.BYTES)];
|
||||
int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / packedBytesDocIDLength;
|
||||
this.offlineBuffer = new byte[numberOfPointsOffline * packedBytesDocIDLength];
|
||||
this.partitionBucket = new int[bytesSorted];
|
||||
this.histogram = new long[HISTOGRAM_SIZE];
|
||||
this.scratch = new byte[bytesSorted];
|
||||
|
@ -134,12 +144,12 @@ public final class BKDRadixSelector {
|
|||
assert commonPrefixPosition > dimCommonPrefix;
|
||||
reader.next();
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
// copy dimension
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
System.arraycopy(packedValue.bytes, packedValue.offset + offset, scratch, 0, bytesPerDim);
|
||||
// copy docID
|
||||
BytesRef docIDBytes = pointValue.docIDBytes();
|
||||
System.arraycopy(docIDBytes.bytes, docIDBytes.offset, scratch, bytesPerDim, Integer.BYTES);
|
||||
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, bytesPerDim);
|
||||
// copy data dimensions and docID
|
||||
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + numIndexDims * bytesPerDim, scratch, bytesPerDim, (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES);
|
||||
|
||||
for (long i = from + 1; i < to; i++) {
|
||||
reader.next();
|
||||
pointValue = reader.pointValue();
|
||||
|
@ -157,13 +167,15 @@ public final class BKDRadixSelector {
|
|||
//check common prefix and adjust histogram
|
||||
final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix;
|
||||
final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition;
|
||||
packedValue = pointValue.packedValue();
|
||||
int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValue.bytes, packedValue.offset + offset + startIndex, packedValue.offset + offset + endIndex);
|
||||
packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValueDocID.bytes, packedValueDocID.offset + offset + startIndex, packedValueDocID.offset + offset + endIndex);
|
||||
if (j == -1) {
|
||||
if (commonPrefixPosition > bytesPerDim) {
|
||||
//tie-break on docID
|
||||
docIDBytes = pointValue.docIDBytes();
|
||||
int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition, docIDBytes.bytes, docIDBytes.offset, docIDBytes.offset + commonPrefixPosition - bytesPerDim);
|
||||
//tie-break on data dimensions + docID
|
||||
final int startTieBreak = numIndexDims * bytesPerDim;
|
||||
final int endTieBreak = startTieBreak + commonPrefixPosition - bytesPerDim;
|
||||
int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition,
|
||||
packedValueDocID.bytes, packedValueDocID.offset + startTieBreak, packedValueDocID.offset + endTieBreak);
|
||||
if (k != -1) {
|
||||
commonPrefixPosition = bytesPerDim + k;
|
||||
Arrays.fill(histogram, 0);
|
||||
|
@ -195,8 +207,8 @@ public final class BKDRadixSelector {
|
|||
BytesRef packedValue = pointValue.packedValue();
|
||||
bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
|
||||
} else {
|
||||
BytesRef docIDValue = pointValue.docIDBytes();
|
||||
bucket = docIDValue.bytes[docIDValue.offset + commonPrefixPosition - bytesPerDim] & 0xff;
|
||||
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
bucket = packedValueDocID.bytes[packedValueDocID.offset + numIndexDims * bytesPerDim + commonPrefixPosition - bytesPerDim] & 0xff;
|
||||
}
|
||||
return bucket;
|
||||
}
|
||||
|
@ -310,10 +322,11 @@ public final class BKDRadixSelector {
|
|||
return partition;
|
||||
}
|
||||
|
||||
private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefix) {
|
||||
final int offset = dim * bytesPerDim + commonPrefix;
|
||||
final int dimCmpBytes = bytesPerDim - commonPrefix;
|
||||
new RadixSelector(bytesSorted - commonPrefix) {
|
||||
private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefixLength) {
|
||||
final int dimOffset = dim * bytesPerDim + commonPrefixLength;
|
||||
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
|
||||
final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
|
||||
new RadixSelector(bytesSorted - commonPrefixLength) {
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
|
@ -325,23 +338,23 @@ public final class BKDRadixSelector {
|
|||
assert k >= 0 : "negative prefix " + k;
|
||||
if (k < dimCmpBytes) {
|
||||
// dim bytes
|
||||
return points.block[i * packedBytesLength + offset + k] & 0xff;
|
||||
return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
|
||||
} else {
|
||||
// doc id
|
||||
int s = 3 - (k - dimCmpBytes);
|
||||
return (points.docIDs[i] >>> (s * 8)) & 0xff;
|
||||
// data bytes
|
||||
return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Selector getFallbackSelector(int d) {
|
||||
int skypedBytes = d + commonPrefix;
|
||||
final int start = dim * bytesPerDim + skypedBytes;
|
||||
final int end = dim * bytesPerDim + bytesPerDim;
|
||||
final int skypedBytes = d + commonPrefixLength;
|
||||
final int dimStart = dim * bytesPerDim + skypedBytes;
|
||||
final int dimEnd = dim * bytesPerDim + bytesPerDim;
|
||||
final int dataOffset = numIndexDims * bytesPerDim;
|
||||
// data length is composed by the data dimensions plus the docID
|
||||
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
|
||||
return new IntroSelector() {
|
||||
|
||||
int pivotDoc = -1;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
points.swap(i, j);
|
||||
|
@ -350,36 +363,37 @@ public final class BKDRadixSelector {
|
|||
@Override
|
||||
protected void setPivot(int i) {
|
||||
if (skypedBytes < bytesPerDim) {
|
||||
System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
||||
System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
||||
}
|
||||
pivotDoc = points.docIDs[i];
|
||||
System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
if (skypedBytes < bytesPerDim) {
|
||||
int iOffset = i * packedBytesLength;
|
||||
int jOffset = j * packedBytesLength;
|
||||
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
|
||||
points.block, jOffset + start, jOffset + end);
|
||||
int iOffset = i * packedBytesDocIDLength;
|
||||
int jOffset = j * packedBytesDocIDLength;
|
||||
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return points.docIDs[i] - points.docIDs[j];
|
||||
int iOffset = i * packedBytesDocIDLength + dataOffset;
|
||||
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||
return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (skypedBytes < bytesPerDim) {
|
||||
int jOffset = j * packedBytesLength;
|
||||
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
|
||||
points.block, jOffset + start, jOffset + end);
|
||||
int jOffset = j * packedBytesDocIDLength;
|
||||
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return pivotDoc - points.docIDs[j];
|
||||
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||
return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -394,8 +408,9 @@ public final class BKDRadixSelector {
|
|||
|
||||
/** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
|
||||
public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
|
||||
final int offset = dim * bytesPerDim + commonPrefixLength;
|
||||
final int dimOffset = dim * bytesPerDim + commonPrefixLength;
|
||||
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
|
||||
final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
|
||||
new MSBRadixSorter(bytesSorted - commonPrefixLength) {
|
||||
|
||||
@Override
|
||||
|
@ -403,11 +418,10 @@ public final class BKDRadixSelector {
|
|||
assert k >= 0 : "negative prefix " + k;
|
||||
if (k < dimCmpBytes) {
|
||||
// dim bytes
|
||||
return points.block[i * packedBytesLength + offset + k] & 0xff;
|
||||
return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
|
||||
} else {
|
||||
// doc id
|
||||
int s = 3 - (k - dimCmpBytes);
|
||||
return (points.docIDs[i] >>> (s * 8)) & 0xff;
|
||||
// data bytes
|
||||
return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -418,13 +432,14 @@ public final class BKDRadixSelector {
|
|||
|
||||
@Override
|
||||
protected Sorter getFallbackSorter(int k) {
|
||||
int skypedBytes = k + commonPrefixLength;
|
||||
final int start = dim * bytesPerDim + skypedBytes;
|
||||
final int end = dim * bytesPerDim + bytesPerDim;
|
||||
final int skypedBytes = k + commonPrefixLength;
|
||||
final int dimStart = dim * bytesPerDim + skypedBytes;
|
||||
final int dimEnd = dim * bytesPerDim + bytesPerDim;
|
||||
final int dataOffset = numIndexDims * bytesPerDim;
|
||||
// data length is composed by the data dimensions plus the docID
|
||||
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
|
||||
return new IntroSorter() {
|
||||
|
||||
int pivotDoc = -1;
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
points.swap(i, j);
|
||||
|
@ -433,36 +448,37 @@ public final class BKDRadixSelector {
|
|||
@Override
|
||||
protected void setPivot(int i) {
|
||||
if (skypedBytes < bytesPerDim) {
|
||||
System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
||||
System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
||||
}
|
||||
pivotDoc = points.docIDs[i];
|
||||
System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
if (skypedBytes < bytesPerDim) {
|
||||
int iOffset = i * packedBytesLength;
|
||||
int jOffset = j * packedBytesLength;
|
||||
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
|
||||
points.block, jOffset + start, jOffset + end);
|
||||
int iOffset = i * packedBytesDocIDLength;
|
||||
int jOffset = j * packedBytesDocIDLength;
|
||||
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return points.docIDs[i] - points.docIDs[j];
|
||||
int iOffset = i * packedBytesDocIDLength + dataOffset;
|
||||
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||
return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (skypedBytes < bytesPerDim) {
|
||||
int jOffset = j * packedBytesLength;
|
||||
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
|
||||
points.block, jOffset + start, jOffset + end);
|
||||
int jOffset = j * packedBytesDocIDLength;
|
||||
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
return pivotDoc - points.docIDs[j];
|
||||
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||
return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -776,7 +776,7 @@ public class BKDWriter implements Closeable {
|
|||
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||
|
||||
//We re-use the selector so we do not need to create an object every time.
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
|
@ -787,7 +787,8 @@ public class BKDWriter implements Closeable {
|
|||
minPackedValue, maxPackedValue,
|
||||
parentSplits,
|
||||
splitPackedValues,
|
||||
leafBlockFPs);
|
||||
leafBlockFPs,
|
||||
new int[maxPointsInLeafNode]);
|
||||
assert Arrays.equals(parentSplits, new int[numIndexDims]);
|
||||
|
||||
// If no exception, we should have cleaned everything up:
|
||||
|
@ -1366,7 +1367,7 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
|
||||
// sort by sortedDim
|
||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
|
||||
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
|
||||
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
BytesRef comparator = scratchBytesRef1;
|
||||
|
@ -1435,7 +1436,7 @@ public class BKDWriter implements Closeable {
|
|||
commonPrefixLen = bytesPerDim;
|
||||
}
|
||||
|
||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
||||
|
||||
// set the split value
|
||||
|
@ -1472,7 +1473,8 @@ public class BKDWriter implements Closeable {
|
|||
byte[] minPackedValue, byte[] maxPackedValue,
|
||||
int[] parentSplits,
|
||||
byte[] splitPackedValues,
|
||||
long[] leafBlockFPs) throws IOException {
|
||||
long[] leafBlockFPs,
|
||||
int[] spareDocIds) throws IOException {
|
||||
|
||||
if (nodeID >= leafNodeOffset) {
|
||||
|
||||
|
@ -1532,7 +1534,13 @@ public class BKDWriter implements Closeable {
|
|||
// loading the values:
|
||||
int count = to - from;
|
||||
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
|
||||
writeLeafBlockDocs(out, heapSource.docIDs, from, count);
|
||||
assert count <= spareDocIds.length : "count=" + count + " > length=" + spareDocIds.length;
|
||||
// Write doc IDs
|
||||
int[] docIDs = spareDocIds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
|
||||
}
|
||||
writeLeafBlockDocs(out, docIDs, 0, count);
|
||||
|
||||
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
|
||||
// from the index, much like how terms dict does so from the FST:
|
||||
|
@ -1555,7 +1563,7 @@ public class BKDWriter implements Closeable {
|
|||
}
|
||||
};
|
||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||
heapSource.docIDs, from);
|
||||
docIDs, 0);
|
||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
|
||||
|
||||
} else {
|
||||
|
@ -1602,12 +1610,12 @@ public class BKDWriter implements Closeable {
|
|||
// Recurse on left tree:
|
||||
build(2 * nodeID, leafNodeOffset, slices[0],
|
||||
out, radixSelector, minPackedValue, maxSplitPackedValue,
|
||||
parentSplits, splitPackedValues, leafBlockFPs);
|
||||
parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
|
||||
// Recurse on right tree:
|
||||
build(2 * nodeID + 1, leafNodeOffset, slices[1],
|
||||
out, radixSelector, minSplitPackedValue, maxPackedValue
|
||||
, parentSplits, splitPackedValues, leafBlockFPs);
|
||||
, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||
|
||||
parentSplits[splitDim]--;
|
||||
}
|
||||
|
@ -1661,6 +1669,13 @@ public class BKDWriter implements Closeable {
|
|||
if (cmp > 0) {
|
||||
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||
}
|
||||
if (cmp == 0 && numDataDims > numIndexDims) {
|
||||
int dataOffset = numIndexDims * bytesPerDim;
|
||||
cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
|
||||
if (cmp > 0) {
|
||||
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||
}
|
||||
}
|
||||
if (cmp == 0 && doc < lastDoc) {
|
||||
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
|
||||
}
|
||||
|
|
|
@ -27,16 +27,16 @@ public final class HeapPointReader implements PointReader {
|
|||
private int curRead;
|
||||
final byte[] block;
|
||||
final int packedBytesLength;
|
||||
final int[] docIDs;
|
||||
final int packedBytesDocIDLength;
|
||||
final int end;
|
||||
private final HeapPointValue pointValue;
|
||||
|
||||
public HeapPointReader(byte[] block, int packedBytesLength, int[] docIDs, int start, int end) {
|
||||
public HeapPointReader(byte[] block, int packedBytesLength, int start, int end) {
|
||||
this.block = block;
|
||||
this.docIDs = docIDs;
|
||||
curRead = start-1;
|
||||
this.end = end;
|
||||
this.packedBytesLength = packedBytesLength;
|
||||
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
|
||||
if (start < end) {
|
||||
this.pointValue = new HeapPointValue(block, packedBytesLength);
|
||||
} else {
|
||||
|
@ -53,7 +53,7 @@ public final class HeapPointReader implements PointReader {
|
|||
|
||||
@Override
|
||||
public PointValue pointValue() {
|
||||
pointValue.setValue(curRead * packedBytesLength, docIDs[curRead]);
|
||||
pointValue.setOffset(curRead * packedBytesDocIDLength);
|
||||
return pointValue;
|
||||
}
|
||||
|
||||
|
@ -66,21 +66,22 @@ public final class HeapPointReader implements PointReader {
|
|||
*/
|
||||
static class HeapPointValue implements PointValue {
|
||||
|
||||
BytesRef packedValue;
|
||||
BytesRef docIDBytes;
|
||||
int docID;
|
||||
final BytesRef packedValue;
|
||||
final BytesRef packedValueDocID;
|
||||
final int packedValueLength;
|
||||
|
||||
public HeapPointValue(byte[] value, int packedLength) {
|
||||
packedValue = new BytesRef(value, 0, packedLength);
|
||||
docIDBytes = new BytesRef(new byte[4]);
|
||||
HeapPointValue(byte[] value, int packedValueLength) {
|
||||
this.packedValueLength = packedValueLength;
|
||||
this.packedValue = new BytesRef(value, 0, packedValueLength);
|
||||
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a new value by changing the offset and docID.
|
||||
* Sets a new value by changing the offset.
|
||||
*/
|
||||
public void setValue(int offset, int docID) {
|
||||
this.docID = docID;
|
||||
public void setOffset(int offset) {
|
||||
packedValue.offset = offset;
|
||||
packedValueDocID.offset = offset;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -90,16 +91,14 @@ public final class HeapPointReader implements PointReader {
|
|||
|
||||
@Override
|
||||
public int docID() {
|
||||
return docID;
|
||||
int position = packedValueDocID.offset + packedValueLength;
|
||||
return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
|
||||
| ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef docIDBytes() {
|
||||
docIDBytes.bytes[0] = (byte) (docID >> 24);
|
||||
docIDBytes.bytes[1] = (byte) (docID >> 16);
|
||||
docIDBytes.bytes[2] = (byte) (docID >> 8);
|
||||
docIDBytes.bytes[3] = (byte) (docID >> 0);
|
||||
return docIDBytes;
|
||||
public BytesRef packedValueDocIDBytes() {
|
||||
return packedValueDocID;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,10 +25,10 @@ import org.apache.lucene.util.FutureArrays;
|
|||
* @lucene.internal
|
||||
* */
|
||||
public final class HeapPointWriter implements PointWriter {
|
||||
public final int[] docIDs;
|
||||
public final byte[] block;
|
||||
final int size;
|
||||
final int packedBytesLength;
|
||||
final int packedBytesDocIDLength;
|
||||
private final byte[] scratch;
|
||||
private int nextWrite;
|
||||
private boolean closed;
|
||||
|
@ -37,11 +37,11 @@ public final class HeapPointWriter implements PointWriter {
|
|||
|
||||
|
||||
public HeapPointWriter(int size, int packedBytesLength) {
|
||||
this.docIDs = new int[size];
|
||||
this.block = new byte[packedBytesLength * size];
|
||||
this.size = size;
|
||||
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
|
||||
this.packedBytesLength = packedBytesLength;
|
||||
this.scratch = new byte[packedBytesLength];
|
||||
this.block = new byte[packedBytesDocIDLength * size];
|
||||
this.size = size;
|
||||
this.scratch = new byte[packedBytesDocIDLength];
|
||||
if (size > 0) {
|
||||
pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength);
|
||||
} else {
|
||||
|
@ -53,7 +53,7 @@ public final class HeapPointWriter implements PointWriter {
|
|||
/** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
|
||||
public PointValue getPackedValueSlice(int index) {
|
||||
assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
|
||||
pointValue.setValue(index * packedBytesLength, docIDs[index]);
|
||||
pointValue.setOffset(index * packedBytesDocIDLength);
|
||||
return pointValue;
|
||||
}
|
||||
|
||||
|
@ -62,8 +62,12 @@ public final class HeapPointWriter implements PointWriter {
|
|||
assert closed == false : "point writer is already closed";
|
||||
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
|
||||
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
||||
System.arraycopy(packedValue, 0, block, nextWrite * packedBytesLength, packedBytesLength);
|
||||
docIDs[nextWrite] = docID;
|
||||
System.arraycopy(packedValue, 0, block, nextWrite * packedBytesDocIDLength, packedBytesLength);
|
||||
int position = nextWrite * packedBytesDocIDLength + packedBytesLength;
|
||||
block[position] = (byte) (docID >> 24);
|
||||
block[++position] = (byte) (docID >> 16);
|
||||
block[++position] = (byte) (docID >> 8);
|
||||
block[++position] = (byte) (docID >> 0);
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
|
@ -71,27 +75,23 @@ public final class HeapPointWriter implements PointWriter {
|
|||
public void append(PointValue pointValue) {
|
||||
assert closed == false : "point writer is already closed";
|
||||
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + (packedBytesLength) + "] but was [" + packedValue.length + "]";
|
||||
System.arraycopy(packedValue.bytes, packedValue.offset, block, nextWrite * packedBytesLength, packedBytesLength);
|
||||
docIDs[nextWrite] = pointValue.docID();
|
||||
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
assert packedValueDocID.length == packedBytesDocIDLength : "[packedValue] must have length [" + (packedBytesDocIDLength) + "] but was [" + packedValueDocID.length + "]";
|
||||
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset, block, nextWrite * packedBytesDocIDLength, packedBytesDocIDLength);
|
||||
nextWrite++;
|
||||
}
|
||||
|
||||
public void swap(int i, int j) {
|
||||
int docID = docIDs[i];
|
||||
docIDs[i] = docIDs[j];
|
||||
docIDs[j] = docID;
|
||||
|
||||
int indexI = i * packedBytesLength;
|
||||
int indexJ = j * packedBytesLength;
|
||||
int indexI = i * packedBytesDocIDLength;
|
||||
int indexJ = j * packedBytesDocIDLength;
|
||||
|
||||
// scratch1 = values[i]
|
||||
System.arraycopy(block, indexI, scratch, 0, packedBytesLength);
|
||||
System.arraycopy(block, indexI, scratch, 0, packedBytesDocIDLength);
|
||||
// values[i] = values[j]
|
||||
System.arraycopy(block, indexJ, block, indexI, packedBytesLength);
|
||||
System.arraycopy(block, indexJ, block, indexI, packedBytesDocIDLength);
|
||||
// values[j] = scratch1
|
||||
System.arraycopy(scratch, 0, block, indexJ, packedBytesLength);
|
||||
System.arraycopy(scratch, 0, block, indexJ, packedBytesDocIDLength);
|
||||
}
|
||||
|
||||
public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
|
||||
|
@ -101,8 +101,8 @@ public final class HeapPointWriter implements PointWriter {
|
|||
for (int dim = 0; dim < numDataDims; dim++) {
|
||||
final int start = dim * bytesPerDim + commonPrefixLengths[dim];
|
||||
final int end = dim * bytesPerDim + bytesPerDim;
|
||||
if (FutureArrays.mismatch(block, i * packedBytesLength + start, i * packedBytesLength + end,
|
||||
block, (i - 1) * packedBytesLength + start, (i - 1) * packedBytesLength + end) != -1) {
|
||||
if (FutureArrays.mismatch(block, i * packedBytesDocIDLength + start, i * packedBytesDocIDLength + end,
|
||||
block, (i - 1) * packedBytesDocIDLength + start, (i - 1) * packedBytesDocIDLength + end) != -1) {
|
||||
leafCardinality++;
|
||||
break;
|
||||
}
|
||||
|
@ -119,9 +119,9 @@ public final class HeapPointWriter implements PointWriter {
|
|||
@Override
|
||||
public PointReader getReader(long start, long length) {
|
||||
assert closed : "point writer is still open and trying to get a reader";
|
||||
assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
|
||||
assert start + length <= size: "start=" + start + " length=" + length + " docIDs.length=" + size;
|
||||
assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
|
||||
return new HeapPointReader(block, packedBytesLength, docIDs, (int) start, Math.toIntExact(start+length));
|
||||
return new HeapPointReader(block, packedBytesLength, (int) start, Math.toIntExact(start+length));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -135,6 +135,6 @@ public final class HeapPointWriter implements PointWriter {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "HeapPointWriter(count=" + nextWrite + " size=" + docIDs.length + ")";
|
||||
return "HeapPointWriter(count=" + nextWrite + " size=" + size + ")";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -77,7 +77,8 @@ public final class MutablePointsReaderUtils {
|
|||
protected int comparePivot(int j) {
|
||||
if (k < packedBytesLength) {
|
||||
reader.getValue(j, scratch);
|
||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k, scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
|
||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k,
|
||||
scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
@ -91,14 +92,16 @@ public final class MutablePointsReaderUtils {
|
|||
}
|
||||
|
||||
/** Sort points on the given dimension. */
|
||||
public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
|
||||
public static void sortByDim(int numDataDim, int numIndexDim, int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
|
||||
MutablePointValues reader, int from, int to,
|
||||
BytesRef scratch1, BytesRef scratch2) {
|
||||
|
||||
final int start = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
|
||||
final int dimEnd = sortedDim * bytesPerDim + bytesPerDim;
|
||||
final int dataStart = numIndexDim * bytesPerDim;
|
||||
final int dataEnd = dataStart + (numDataDim - numIndexDim) * bytesPerDim;
|
||||
// No need for a fancy radix sort here, this is called on the leaves only so
|
||||
// there are not many values to sort
|
||||
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
|
||||
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
|
||||
new IntroSorter() {
|
||||
|
||||
final BytesRef pivot = scratch1;
|
||||
|
@ -118,9 +121,14 @@ public final class MutablePointsReaderUtils {
|
|||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
reader.getValue(j, scratch2);
|
||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset, pivot.offset + offset + numBytesToCompare, scratch2.bytes, scratch2.offset + offset, scratch2.offset + offset + numBytesToCompare);
|
||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + start, pivot.offset + dimEnd, scratch2.bytes,
|
||||
scratch2.offset + start, scratch2.offset + dimEnd);
|
||||
if (cmp == 0) {
|
||||
cmp = pivotDoc - reader.getDocID(j);
|
||||
cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
|
||||
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
|
||||
if (cmp == 0) {
|
||||
cmp = pivotDoc - reader.getDocID(j);
|
||||
}
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
@ -130,16 +138,20 @@ public final class MutablePointsReaderUtils {
|
|||
/** Partition points around {@code mid}. All values on the left must be less
|
||||
* than or equal to it and all values on the right must be greater than or
|
||||
* equal to it. */
|
||||
public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
|
||||
public static void partition(int numDataDim, int numIndexDim, int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
|
||||
MutablePointValues reader, int from, int to, int mid,
|
||||
BytesRef scratch1, BytesRef scratch2) {
|
||||
final int offset = splitDim * bytesPerDim + commonPrefixLen;
|
||||
final int cmpBytes = bytesPerDim - commonPrefixLen;
|
||||
final int dimOffset = splitDim * bytesPerDim + commonPrefixLen;
|
||||
final int dimCmpBytes = bytesPerDim - commonPrefixLen;
|
||||
final int dataOffset = numIndexDim * bytesPerDim;
|
||||
final int dataCmpBytes = (numDataDim - numIndexDim) * bytesPerDim + dimCmpBytes;
|
||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
|
||||
new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
|
||||
|
||||
@Override
|
||||
protected Selector getFallbackSelector(int k) {
|
||||
final int dataStart = (k < dimCmpBytes) ? dataOffset : dataOffset + k - dimCmpBytes;
|
||||
final int dataEnd = numDataDim * bytesPerDim;
|
||||
return new IntroSelector() {
|
||||
|
||||
final BytesRef pivot = scratch1;
|
||||
|
@ -158,9 +170,18 @@ public final class MutablePointsReaderUtils {
|
|||
|
||||
@Override
|
||||
protected int comparePivot(int j) {
|
||||
if (k < cmpBytes) {
|
||||
if (k < dimCmpBytes) {
|
||||
reader.getValue(j, scratch2);
|
||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset + k, pivot.offset + offset + k + cmpBytes - k, scratch2.bytes, scratch2.offset + offset + k, scratch2.offset + offset + k + cmpBytes - k);
|
||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dimOffset + k, pivot.offset + dimOffset + dimCmpBytes,
|
||||
scratch2.bytes, scratch2.offset + dimOffset + k, scratch2.offset + dimOffset + dimCmpBytes);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
if (k < dataCmpBytes) {
|
||||
reader.getValue(j, scratch2);
|
||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
|
||||
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
@ -177,10 +198,12 @@ public final class MutablePointsReaderUtils {
|
|||
|
||||
@Override
|
||||
protected int byteAt(int i, int k) {
|
||||
if (k < cmpBytes) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
|
||||
if (k < dimCmpBytes) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
|
||||
} else if (k < dataCmpBytes) {
|
||||
return Byte.toUnsignedInt(reader.getByteAt(i, dataOffset + k - dimCmpBytes));
|
||||
} else {
|
||||
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
|
||||
final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
|
||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -137,12 +137,14 @@ public final class OfflinePointReader implements PointReader {
|
|||
*/
|
||||
static class OfflinePointValue implements PointValue {
|
||||
|
||||
BytesRef packedValue;
|
||||
BytesRef docIDBytes;
|
||||
final BytesRef packedValue;
|
||||
final BytesRef packedValueDocID;
|
||||
final int packedValueLength;
|
||||
|
||||
OfflinePointValue(byte[] value, int packedValueLength) {
|
||||
packedValue = new BytesRef(value, 0, packedValueLength);
|
||||
docIDBytes = new BytesRef(value, packedValueLength, Integer.BYTES);
|
||||
this.packedValueLength = packedValueLength;
|
||||
this.packedValue = new BytesRef(value, 0, packedValueLength);
|
||||
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -150,7 +152,7 @@ public final class OfflinePointReader implements PointReader {
|
|||
*/
|
||||
public void setOffset(int offset) {
|
||||
packedValue.offset = offset;
|
||||
docIDBytes.offset = offset + packedValue.length;
|
||||
packedValueDocID.offset = offset;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -160,14 +162,14 @@ public final class OfflinePointReader implements PointReader {
|
|||
|
||||
@Override
|
||||
public int docID() {
|
||||
int position =docIDBytes.offset;
|
||||
return ((docIDBytes.bytes[position] & 0xFF) << 24) | ((docIDBytes.bytes[++position] & 0xFF) << 16)
|
||||
| ((docIDBytes.bytes[++position] & 0xFF) << 8) | (docIDBytes.bytes[++position] & 0xFF);
|
||||
int position = packedValueDocID.offset + packedValueLength;
|
||||
return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
|
||||
| ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef docIDBytes() {
|
||||
return docIDBytes;
|
||||
public BytesRef packedValueDocIDBytes() {
|
||||
return packedValueDocID;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -62,12 +62,9 @@ public final class OfflinePointWriter implements PointWriter {
|
|||
@Override
|
||||
public void append(PointValue pointValue) throws IOException {
|
||||
assert closed == false : "Point writer is already closed";
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
|
||||
out.writeBytes(packedValue.bytes, packedValue.offset, packedValue.length);
|
||||
BytesRef docIDBytes = pointValue.docIDBytes();
|
||||
assert docIDBytes.length == Integer.BYTES : "[docIDBytes] must have length [" + Integer.BYTES + "] but was [" + docIDBytes.length + "]";
|
||||
out.writeBytes(docIDBytes.bytes, docIDBytes.offset, docIDBytes.length);
|
||||
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||
assert packedValueDocID.length == packedBytesLength + Integer.BYTES : "[packedValue and docID] must have length [" + (packedBytesLength + Integer.BYTES) + "] but was [" + packedValueDocID.length + "]";
|
||||
out.writeBytes(packedValueDocID.bytes, packedValueDocID.offset, packedValueDocID.length);
|
||||
count++;
|
||||
assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count;
|
||||
}
|
||||
|
|
|
@ -28,10 +28,11 @@ public interface PointValue {
|
|||
/** Returns the packed values for the dimensions */
|
||||
BytesRef packedValue();
|
||||
|
||||
/** Returns the document id */
|
||||
/** Returns the docID */
|
||||
int docID();
|
||||
|
||||
/** Returns the byte representation of the document id */
|
||||
BytesRef docIDBytes();
|
||||
/** Returns the byte representation of the packed value
|
||||
* together with the docID */
|
||||
BytesRef packedValueDocIDBytes();
|
||||
|
||||
}
|
||||
|
|
|
@ -1098,6 +1098,80 @@ public class TestBKD extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testCheckDataDimOptimalOrder() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
final int numValues = atLeast(5000);
|
||||
final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
|
||||
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
|
||||
final double maxMB = (float) 3.0 + (3*random().nextDouble());
|
||||
|
||||
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
|
||||
|
||||
final byte[] pointValue1 = new byte[numDataDims * numBytesPerDim];
|
||||
final byte[] pointValue2 = new byte[numDataDims * numBytesPerDim];
|
||||
random().nextBytes(pointValue1);
|
||||
random().nextBytes(pointValue2);
|
||||
// equal index dimensions but different data dimensions
|
||||
for (int i = 0; i < numIndexDims; i++) {
|
||||
System.arraycopy(pointValue1, i * numBytesPerDim, pointValue2, i * numBytesPerDim, numBytesPerDim);
|
||||
}
|
||||
|
||||
BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode,
|
||||
maxMB, 2 * numValues);
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
w.add(pointValue1, i);
|
||||
w.add(pointValue2, i);
|
||||
}
|
||||
final long indexFP;
|
||||
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
|
||||
indexFP = w.finish(out);
|
||||
w.close();
|
||||
}
|
||||
|
||||
IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
|
||||
pointsIn.seek(indexFP);
|
||||
BKDReader points = new BKDReader(pointsIn);
|
||||
|
||||
points.intersect(new IntersectVisitor() {
|
||||
|
||||
byte[] previous = null;
|
||||
boolean hasChanged = false;
|
||||
|
||||
@Override
|
||||
public void visit(int docID) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(int docID, byte[] packedValue) {
|
||||
if (previous == null) {
|
||||
previous = new byte[numDataDims * numBytesPerDim];
|
||||
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
|
||||
} else {
|
||||
int mismatch = FutureArrays.mismatch(packedValue, 0, numDataDims * numBytesPerDim, previous, 0, numDataDims * numBytesPerDim);
|
||||
if (mismatch != -1) {
|
||||
if (hasChanged == false) {
|
||||
hasChanged = true;
|
||||
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
|
||||
} else {
|
||||
fail("Points are not in optimal order");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||
return Relation.CELL_CROSSES_QUERY;
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
pointsIn.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void test2DLongOrdsOffline() throws Exception {
|
||||
try (Directory dir = newDirectory()) {
|
||||
int numDocs = 100000;
|
||||
|
|
|
@ -209,7 +209,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
|
||||
//we only split by indexed dimension so we check for each only those dimension
|
||||
for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
|
||||
//We need to make a copy of the data as it is deleted in the process
|
||||
|
@ -225,9 +225,15 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
int cmp = FutureArrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions);
|
||||
assertTrue(cmp <= 0);
|
||||
if (cmp == 0) {
|
||||
int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint);
|
||||
int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint);
|
||||
assertTrue(minDocId >= maxDocID);
|
||||
byte[] maxDataDim = getMaxDataDimension(slices[0], bytesPerDimensions, dataDimensions, indexDimensions, max, splitDim);
|
||||
byte[] minDataDim = getMinDataDimension(slices[1], bytesPerDimensions, dataDimensions, indexDimensions, min, splitDim);
|
||||
cmp = FutureArrays.compareUnsigned(maxDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions, minDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions);
|
||||
assertTrue(cmp <= 0);
|
||||
if (cmp == 0) {
|
||||
int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,maxDataDim);
|
||||
int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,minDataDim);
|
||||
assertTrue(minDocId >= maxDocID);
|
||||
}
|
||||
}
|
||||
assertTrue(Arrays.equals(partitionPoint, min));
|
||||
slices[0].writer.destroy();
|
||||
|
@ -293,14 +299,17 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
return min;
|
||||
}
|
||||
|
||||
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException {
|
||||
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
|
||||
int docID = Integer.MAX_VALUE;
|
||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
int offset = dimension * bytesPerDimension;
|
||||
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
|
||||
int dataOffset = indexDims * bytesPerDimension;
|
||||
int dataLength = (dataDims - indexDims) * bytesPerDimension;
|
||||
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
|
||||
&& FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
|
||||
int newDocID = pointValue.docID();
|
||||
if (newDocID < docID) {
|
||||
docID = newDocID;
|
||||
|
@ -311,6 +320,26 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
return docID;
|
||||
}
|
||||
|
||||
private byte[] getMinDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] minDim, int splitDim) throws IOException {
|
||||
byte[] min = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||
Arrays.fill(min, (byte) 0xff);
|
||||
int offset = splitDim * bytesPerDimension;
|
||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
if (FutureArrays.mismatch(minDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
|
||||
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||
if (FutureArrays.compareUnsigned(min, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) > 0) {
|
||||
System.arraycopy(value, 0, min, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return min;
|
||||
}
|
||||
|
||||
private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException {
|
||||
byte[] max = new byte[bytesPerDimension];
|
||||
Arrays.fill(max, (byte) 0);
|
||||
|
@ -328,14 +357,37 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
|||
return max;
|
||||
}
|
||||
|
||||
private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException {
|
||||
private byte[] getMaxDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] maxDim, int splitDim) throws IOException {
|
||||
byte[] max = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||
Arrays.fill(max, (byte) 0);
|
||||
int offset = splitDim * bytesPerDimension;
|
||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
if (FutureArrays.mismatch(maxDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
|
||||
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||
if (FutureArrays.compareUnsigned(max, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) < 0) {
|
||||
System.arraycopy(value, 0, max, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
|
||||
int docID = Integer.MIN_VALUE;
|
||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||
while (reader.next()) {
|
||||
PointValue pointValue = reader.pointValue();
|
||||
BytesRef packedValue = pointValue.packedValue();
|
||||
int offset = dimension * bytesPerDimension;
|
||||
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
|
||||
int dataOffset = indexDims * bytesPerDimension;
|
||||
int dataLength = (dataDims - indexDims) * bytesPerDimension;
|
||||
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
|
||||
&& FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
|
||||
int newDocID = pointValue.docID();
|
||||
if (newDocID > docID) {
|
||||
docID = newDocID;
|
||||
|
|
|
@ -117,7 +117,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
|
||||
int packedBytesLength = dataDimensions * bytesPerDim;
|
||||
Directory dir = newDirectory();
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDim, 1000, dir, "test");
|
||||
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDim, 1000, dir, "test");
|
||||
// we check for each dimension
|
||||
for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
|
||||
radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
|
||||
|
@ -130,6 +130,11 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
|||
BytesRef value = pointValue.packedValue();
|
||||
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
|
||||
assertTrue(cmp >= 0);
|
||||
if (cmp == 0) {
|
||||
int dataOffset = indexDimensions * bytesPerDim;
|
||||
cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataOffset, value.offset + packedBytesLength, previous, dataOffset, packedBytesLength);
|
||||
assertTrue(cmp >= 0);
|
||||
}
|
||||
if (cmp == 0) {
|
||||
assertTrue(pointValue.docID() >= previousDocId);
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
private void doTestSort() {
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
|
||||
Point[] points = createRandomPoints(1, 1, bytesPerDim, maxDoc, new int[1]);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
|
||||
Arrays.sort(points, new Comparator<Point>() {
|
||||
|
@ -62,25 +62,15 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void doTestSortByDim() {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
||||
int[] commonPrefixLengths = new int[numDims];
|
||||
for (int i = 0; i < commonPrefixLengths.length; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
}
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
for (int dim = 0; dim < numDims; ++dim) {
|
||||
int offset = dim * bytesPerDim;
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
||||
}
|
||||
}
|
||||
int[] commonPrefixLengths = new int[numDataDims];
|
||||
Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int sortedDim = random().nextInt(numDims);
|
||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
|
||||
final int sortedDim = random().nextInt(numIndexDims);
|
||||
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
|
||||
new BytesRef(), new BytesRef());
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
final int offset = sortedDim * bytesPerDim;
|
||||
|
@ -88,7 +78,13 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
BytesRef currentValue = reader.points[i].packedValue;
|
||||
int cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i - 1].doc - reader.points[i].doc;
|
||||
int dataDimOffset = numIndexDims * bytesPerDim;
|
||||
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
|
||||
cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + dataDimOffset, previousValue.offset + dataDimOffset + dataDimsLength,
|
||||
currentValue.bytes, currentValue.offset + dataDimOffset, currentValue.offset + dataDimOffset + dataDimsLength);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i - 1].doc - reader.points[i].doc;
|
||||
}
|
||||
}
|
||||
assertTrue(cmp <= 0);
|
||||
}
|
||||
|
@ -101,29 +97,31 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void doTestPartition() {
|
||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
|
||||
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
|
||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||
int[] commonPrefixLengths = new int[numDataDims];
|
||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
||||
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
final int splitDim = random().nextInt(numDims);
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
int offset = splitDim * bytesPerDim;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
|
||||
}
|
||||
Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
|
||||
final int splitDim = random().nextInt(numIndexDims);
|
||||
DummyPointsReader reader = new DummyPointsReader(points);
|
||||
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
|
||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
|
||||
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLengths[splitDim], reader, 0, points.length, pivot,
|
||||
new BytesRef(), new BytesRef());
|
||||
BytesRef pivotValue = reader.points[pivot].packedValue;
|
||||
int offset = splitDim * bytesPerDim;
|
||||
for (int i = 0; i < points.length; ++i) {
|
||||
BytesRef value = reader.points[i].packedValue;
|
||||
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim, pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
|
||||
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim,
|
||||
pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i].doc - reader.points[pivot].doc;
|
||||
int dataDimOffset = numIndexDims * bytesPerDim;
|
||||
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
|
||||
cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataDimOffset, value.offset + dataDimOffset + dataDimsLength,
|
||||
pivotValue.bytes, pivotValue.offset + dataDimOffset, pivotValue.offset + dataDimOffset + dataDimsLength);
|
||||
if (cmp == 0) {
|
||||
cmp = reader.points[i].doc - reader.points[pivot].doc;
|
||||
}
|
||||
}
|
||||
if (i < pivot) {
|
||||
assertTrue(cmp <= 0);
|
||||
|
@ -135,14 +133,54 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
|
||||
final int packedBytesLength = numDims * bytesPerDim;
|
||||
private static Point[] createRandomPoints(int numDataDims, int numIndexdims, int bytesPerDim, int maxDoc, int[] commonPrefixLengths) {
|
||||
assertTrue(commonPrefixLengths.length == numDataDims);
|
||||
final int packedBytesLength = numDataDims * bytesPerDim;
|
||||
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
|
||||
Point[] points = new Point[numPoints];
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[packedBytesLength];
|
||||
random().nextBytes(value);
|
||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||
if (random().nextInt(5) != 0) {
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[packedBytesLength];
|
||||
random().nextBytes(value);
|
||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||
}
|
||||
for (int i = 0; i < numDataDims; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
}
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
for (int dim = 0; dim < numDataDims; ++dim) {
|
||||
int offset = dim * bytesPerDim;
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
//index dim are equal, data dims different
|
||||
byte[] indexDims = new byte[numIndexdims * bytesPerDim];
|
||||
random().nextBytes(indexDims);
|
||||
byte[] dataDims = new byte[(numDataDims - numIndexdims) * bytesPerDim];
|
||||
for (int i = 0; i < numPoints; ++i) {
|
||||
byte[] value = new byte[packedBytesLength];
|
||||
System.arraycopy(indexDims, 0, value, 0, numIndexdims * bytesPerDim);
|
||||
random().nextBytes(dataDims);
|
||||
System.arraycopy(dataDims, 0, value, numIndexdims * bytesPerDim, (numDataDims - numIndexdims) * bytesPerDim);
|
||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||
}
|
||||
for (int i = 0; i < numIndexdims; ++i) {
|
||||
commonPrefixLengths[i] = bytesPerDim;
|
||||
}
|
||||
for (int i = numDataDims; i < numDataDims; ++i) {
|
||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||
}
|
||||
BytesRef firstValue = points[0].packedValue;
|
||||
for (int i = 1; i < points.length; ++i) {
|
||||
for (int dim = numIndexdims; dim < numDataDims; ++dim) {
|
||||
int offset = dim * bytesPerDim;
|
||||
BytesRef packedValue = points[i].packedValue;
|
||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return points;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue