mirror of https://github.com/apache/lucene.git
LUCENE-8888: Improve distribution of points with data dimensions in BKD tree leaves (#747)
This commit is contained in:
parent
792871c480
commit
ef64f7af3e
|
@ -138,6 +138,9 @@ Optimizations
|
||||||
* LUCENE-8901: Load frequencies lazily only when needed in BlockDocsEnum and
|
* LUCENE-8901: Load frequencies lazily only when needed in BlockDocsEnum and
|
||||||
BlockImpactsEverythingEnum (Mayya Sharipova).
|
BlockImpactsEverythingEnum (Mayya Sharipova).
|
||||||
|
|
||||||
|
* LUCENE-8888: Optimize distribution of points with data dimensions in
|
||||||
|
BKD tree leaves. (Ignacio Vera)
|
||||||
|
|
||||||
Test Framework
|
Test Framework
|
||||||
|
|
||||||
* LUCENE-8825: CheckHits now display the shard index in case of mismatch
|
* LUCENE-8825: CheckHits now display the shard index in case of mismatch
|
||||||
|
|
|
@ -597,7 +597,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||||
|
|
||||||
//We re-use the selector so we do not need to create an object every time.
|
//We re-use the selector so we do not need to create an object every time.
|
||||||
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
@ -605,7 +605,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
|
|
||||||
build(1, numLeaves, points, out,
|
build(1, numLeaves, points, out,
|
||||||
radixSelector, minPackedValue, maxPackedValue,
|
radixSelector, minPackedValue, maxPackedValue,
|
||||||
splitPackedValues, leafBlockFPs);
|
splitPackedValues, leafBlockFPs, new int[maxPointsInLeafNode]);
|
||||||
|
|
||||||
|
|
||||||
// If no exception, we should have cleaned everything up:
|
// If no exception, we should have cleaned everything up:
|
||||||
|
@ -877,7 +877,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort by sortedDim
|
// sort by sortedDim
|
||||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
|
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
|
||||||
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
||||||
|
|
||||||
// Save the block file pointer:
|
// Save the block file pointer:
|
||||||
|
@ -920,7 +920,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||||
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
||||||
|
|
||||||
// set the split value
|
// set the split value
|
||||||
|
@ -951,7 +951,8 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
BKDRadixSelector radixSelector,
|
BKDRadixSelector radixSelector,
|
||||||
byte[] minPackedValue, byte[] maxPackedValue,
|
byte[] minPackedValue, byte[] maxPackedValue,
|
||||||
byte[] splitPackedValues,
|
byte[] splitPackedValues,
|
||||||
long[] leafBlockFPs) throws IOException {
|
long[] leafBlockFPs,
|
||||||
|
int[] spareDocIds) throws IOException {
|
||||||
|
|
||||||
if (nodeID >= leafNodeOffset) {
|
if (nodeID >= leafNodeOffset) {
|
||||||
|
|
||||||
|
@ -1010,7 +1011,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
// loading the values:
|
// loading the values:
|
||||||
int count = to - from;
|
int count = to - from;
|
||||||
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
|
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
|
||||||
writeLeafBlockDocs(out, heapSource.docIDs, from, count);
|
// Write doc IDs
|
||||||
|
int[] docIDs = spareDocIds;
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
|
||||||
|
}
|
||||||
|
writeLeafBlockDocs(out, spareDocIds, 0, count);
|
||||||
|
|
||||||
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
|
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
|
||||||
// from the index, much like how terms dict does so from the FST:
|
// from the index, much like how terms dict does so from the FST:
|
||||||
|
@ -1030,7 +1036,7 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||||
heapSource.docIDs, from);
|
docIDs, 0);
|
||||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
|
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -1075,12 +1081,12 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
|
|
||||||
// Recurse on left tree:
|
// Recurse on left tree:
|
||||||
build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector,
|
build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector,
|
||||||
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs);
|
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||||
|
|
||||||
// TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right?
|
// TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right?
|
||||||
// Recurse on right tree:
|
// Recurse on right tree:
|
||||||
build(2*nodeID+1, leafNodeOffset, pathSlices[1], out, radixSelector,
|
build(2*nodeID+1, leafNodeOffset, pathSlices[1], out, radixSelector,
|
||||||
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs);
|
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1132,6 +1138,13 @@ final class SimpleTextBKDWriter implements Closeable {
|
||||||
if (cmp > 0) {
|
if (cmp > 0) {
|
||||||
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
|
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
|
||||||
}
|
}
|
||||||
|
if (cmp == 0 && numDataDims > numIndexDims) {
|
||||||
|
int dataOffset = numIndexDims * bytesPerDim;
|
||||||
|
cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
|
||||||
|
if (cmp > 0) {
|
||||||
|
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (cmp == 0 && doc < lastDoc) {
|
if (cmp == 0 && doc < lastDoc) {
|
||||||
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim);
|
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim);
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,6 +48,8 @@ public final class BKDRadixSelector {
|
||||||
private final int bytesSorted;
|
private final int bytesSorted;
|
||||||
//data dimensions size
|
//data dimensions size
|
||||||
private final int packedBytesLength;
|
private final int packedBytesLength;
|
||||||
|
// data dimensions plus docID size
|
||||||
|
private final int packedBytesDocIDLength;
|
||||||
//flag to when we are moving to sort on heap
|
//flag to when we are moving to sort on heap
|
||||||
private final int maxPointsSortInHeap;
|
private final int maxPointsSortInHeap;
|
||||||
//reusable buffer
|
//reusable buffer
|
||||||
|
@ -60,18 +62,26 @@ public final class BKDRadixSelector {
|
||||||
private final Directory tempDir;
|
private final Directory tempDir;
|
||||||
// prefix for temp files
|
// prefix for temp files
|
||||||
private final String tempFileNamePrefix;
|
private final String tempFileNamePrefix;
|
||||||
|
// data and index dimensions
|
||||||
|
private final int numDataDims, numIndexDims;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sole constructor.
|
* Sole constructor.
|
||||||
*/
|
*/
|
||||||
public BKDRadixSelector(int numDim, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
|
public BKDRadixSelector(int numDataDims, int numIndexDims, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
|
||||||
this.bytesPerDim = bytesPerDim;
|
this.bytesPerDim = bytesPerDim;
|
||||||
this.packedBytesLength = numDim * bytesPerDim;
|
this.numDataDims = numDataDims;
|
||||||
this.bytesSorted = bytesPerDim + Integer.BYTES;
|
this.numIndexDims = numIndexDims;
|
||||||
|
this.packedBytesLength = numDataDims * bytesPerDim;
|
||||||
|
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
|
||||||
|
// Selection and sorting is done in a given dimension. In case the value of the dimension are equal
|
||||||
|
// between two points we tie break first using the data-only dimensions and if those are still equal
|
||||||
|
// we tie-break on the docID. Here we account for all bytes used in the process.
|
||||||
|
this.bytesSorted = bytesPerDim + (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
|
||||||
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
this.maxPointsSortInHeap = maxPointsSortInHeap;
|
||||||
int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / (packedBytesLength + Integer.BYTES);
|
int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / packedBytesDocIDLength;
|
||||||
this.offlineBuffer = new byte[numberOfPointsOffline * (packedBytesLength + Integer.BYTES)];
|
this.offlineBuffer = new byte[numberOfPointsOffline * packedBytesDocIDLength];
|
||||||
this.partitionBucket = new int[bytesSorted];
|
this.partitionBucket = new int[bytesSorted];
|
||||||
this.histogram = new long[HISTOGRAM_SIZE];
|
this.histogram = new long[HISTOGRAM_SIZE];
|
||||||
this.scratch = new byte[bytesSorted];
|
this.scratch = new byte[bytesSorted];
|
||||||
|
@ -134,12 +144,12 @@ public final class BKDRadixSelector {
|
||||||
assert commonPrefixPosition > dimCommonPrefix;
|
assert commonPrefixPosition > dimCommonPrefix;
|
||||||
reader.next();
|
reader.next();
|
||||||
PointValue pointValue = reader.pointValue();
|
PointValue pointValue = reader.pointValue();
|
||||||
|
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||||
// copy dimension
|
// copy dimension
|
||||||
BytesRef packedValue = pointValue.packedValue();
|
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, bytesPerDim);
|
||||||
System.arraycopy(packedValue.bytes, packedValue.offset + offset, scratch, 0, bytesPerDim);
|
// copy data dimensions and docID
|
||||||
// copy docID
|
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + numIndexDims * bytesPerDim, scratch, bytesPerDim, (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES);
|
||||||
BytesRef docIDBytes = pointValue.docIDBytes();
|
|
||||||
System.arraycopy(docIDBytes.bytes, docIDBytes.offset, scratch, bytesPerDim, Integer.BYTES);
|
|
||||||
for (long i = from + 1; i < to; i++) {
|
for (long i = from + 1; i < to; i++) {
|
||||||
reader.next();
|
reader.next();
|
||||||
pointValue = reader.pointValue();
|
pointValue = reader.pointValue();
|
||||||
|
@ -157,13 +167,15 @@ public final class BKDRadixSelector {
|
||||||
//check common prefix and adjust histogram
|
//check common prefix and adjust histogram
|
||||||
final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix;
|
final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix;
|
||||||
final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition;
|
final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition;
|
||||||
packedValue = pointValue.packedValue();
|
packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||||
int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValue.bytes, packedValue.offset + offset + startIndex, packedValue.offset + offset + endIndex);
|
int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValueDocID.bytes, packedValueDocID.offset + offset + startIndex, packedValueDocID.offset + offset + endIndex);
|
||||||
if (j == -1) {
|
if (j == -1) {
|
||||||
if (commonPrefixPosition > bytesPerDim) {
|
if (commonPrefixPosition > bytesPerDim) {
|
||||||
//tie-break on docID
|
//tie-break on data dimensions + docID
|
||||||
docIDBytes = pointValue.docIDBytes();
|
final int startTieBreak = numIndexDims * bytesPerDim;
|
||||||
int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition, docIDBytes.bytes, docIDBytes.offset, docIDBytes.offset + commonPrefixPosition - bytesPerDim);
|
final int endTieBreak = startTieBreak + commonPrefixPosition - bytesPerDim;
|
||||||
|
int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition,
|
||||||
|
packedValueDocID.bytes, packedValueDocID.offset + startTieBreak, packedValueDocID.offset + endTieBreak);
|
||||||
if (k != -1) {
|
if (k != -1) {
|
||||||
commonPrefixPosition = bytesPerDim + k;
|
commonPrefixPosition = bytesPerDim + k;
|
||||||
Arrays.fill(histogram, 0);
|
Arrays.fill(histogram, 0);
|
||||||
|
@ -195,8 +207,8 @@ public final class BKDRadixSelector {
|
||||||
BytesRef packedValue = pointValue.packedValue();
|
BytesRef packedValue = pointValue.packedValue();
|
||||||
bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
|
bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
|
||||||
} else {
|
} else {
|
||||||
BytesRef docIDValue = pointValue.docIDBytes();
|
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||||
bucket = docIDValue.bytes[docIDValue.offset + commonPrefixPosition - bytesPerDim] & 0xff;
|
bucket = packedValueDocID.bytes[packedValueDocID.offset + numIndexDims * bytesPerDim + commonPrefixPosition - bytesPerDim] & 0xff;
|
||||||
}
|
}
|
||||||
return bucket;
|
return bucket;
|
||||||
}
|
}
|
||||||
|
@ -310,10 +322,11 @@ public final class BKDRadixSelector {
|
||||||
return partition;
|
return partition;
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefix) {
|
private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefixLength) {
|
||||||
final int offset = dim * bytesPerDim + commonPrefix;
|
final int dimOffset = dim * bytesPerDim + commonPrefixLength;
|
||||||
final int dimCmpBytes = bytesPerDim - commonPrefix;
|
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
|
||||||
new RadixSelector(bytesSorted - commonPrefix) {
|
final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
|
||||||
|
new RadixSelector(bytesSorted - commonPrefixLength) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void swap(int i, int j) {
|
protected void swap(int i, int j) {
|
||||||
|
@ -325,23 +338,23 @@ public final class BKDRadixSelector {
|
||||||
assert k >= 0 : "negative prefix " + k;
|
assert k >= 0 : "negative prefix " + k;
|
||||||
if (k < dimCmpBytes) {
|
if (k < dimCmpBytes) {
|
||||||
// dim bytes
|
// dim bytes
|
||||||
return points.block[i * packedBytesLength + offset + k] & 0xff;
|
return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
|
||||||
} else {
|
} else {
|
||||||
// doc id
|
// data bytes
|
||||||
int s = 3 - (k - dimCmpBytes);
|
return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
|
||||||
return (points.docIDs[i] >>> (s * 8)) & 0xff;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Selector getFallbackSelector(int d) {
|
protected Selector getFallbackSelector(int d) {
|
||||||
int skypedBytes = d + commonPrefix;
|
final int skypedBytes = d + commonPrefixLength;
|
||||||
final int start = dim * bytesPerDim + skypedBytes;
|
final int dimStart = dim * bytesPerDim + skypedBytes;
|
||||||
final int end = dim * bytesPerDim + bytesPerDim;
|
final int dimEnd = dim * bytesPerDim + bytesPerDim;
|
||||||
|
final int dataOffset = numIndexDims * bytesPerDim;
|
||||||
|
// data length is composed by the data dimensions plus the docID
|
||||||
|
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
|
||||||
return new IntroSelector() {
|
return new IntroSelector() {
|
||||||
|
|
||||||
int pivotDoc = -1;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void swap(int i, int j) {
|
protected void swap(int i, int j) {
|
||||||
points.swap(i, j);
|
points.swap(i, j);
|
||||||
|
@ -350,36 +363,37 @@ public final class BKDRadixSelector {
|
||||||
@Override
|
@Override
|
||||||
protected void setPivot(int i) {
|
protected void setPivot(int i) {
|
||||||
if (skypedBytes < bytesPerDim) {
|
if (skypedBytes < bytesPerDim) {
|
||||||
System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
||||||
}
|
}
|
||||||
pivotDoc = points.docIDs[i];
|
System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int compare(int i, int j) {
|
protected int compare(int i, int j) {
|
||||||
if (skypedBytes < bytesPerDim) {
|
if (skypedBytes < bytesPerDim) {
|
||||||
int iOffset = i * packedBytesLength;
|
int iOffset = i * packedBytesDocIDLength;
|
||||||
int jOffset = j * packedBytesLength;
|
int jOffset = j * packedBytesDocIDLength;
|
||||||
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
|
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||||
points.block, jOffset + start, jOffset + end);
|
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return points.docIDs[i] - points.docIDs[j];
|
int iOffset = i * packedBytesDocIDLength + dataOffset;
|
||||||
|
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||||
|
return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
if (skypedBytes < bytesPerDim) {
|
if (skypedBytes < bytesPerDim) {
|
||||||
int jOffset = j * packedBytesLength;
|
int jOffset = j * packedBytesDocIDLength;
|
||||||
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
|
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||||
points.block, jOffset + start, jOffset + end);
|
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return pivotDoc - points.docIDs[j];
|
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||||
|
return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -394,8 +408,9 @@ public final class BKDRadixSelector {
|
||||||
|
|
||||||
/** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
|
/** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
|
||||||
public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
|
public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
|
||||||
final int offset = dim * bytesPerDim + commonPrefixLength;
|
final int dimOffset = dim * bytesPerDim + commonPrefixLength;
|
||||||
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
|
final int dimCmpBytes = bytesPerDim - commonPrefixLength;
|
||||||
|
final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
|
||||||
new MSBRadixSorter(bytesSorted - commonPrefixLength) {
|
new MSBRadixSorter(bytesSorted - commonPrefixLength) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -403,11 +418,10 @@ public final class BKDRadixSelector {
|
||||||
assert k >= 0 : "negative prefix " + k;
|
assert k >= 0 : "negative prefix " + k;
|
||||||
if (k < dimCmpBytes) {
|
if (k < dimCmpBytes) {
|
||||||
// dim bytes
|
// dim bytes
|
||||||
return points.block[i * packedBytesLength + offset + k] & 0xff;
|
return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
|
||||||
} else {
|
} else {
|
||||||
// doc id
|
// data bytes
|
||||||
int s = 3 - (k - dimCmpBytes);
|
return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
|
||||||
return (points.docIDs[i] >>> (s * 8)) & 0xff;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,13 +432,14 @@ public final class BKDRadixSelector {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Sorter getFallbackSorter(int k) {
|
protected Sorter getFallbackSorter(int k) {
|
||||||
int skypedBytes = k + commonPrefixLength;
|
final int skypedBytes = k + commonPrefixLength;
|
||||||
final int start = dim * bytesPerDim + skypedBytes;
|
final int dimStart = dim * bytesPerDim + skypedBytes;
|
||||||
final int end = dim * bytesPerDim + bytesPerDim;
|
final int dimEnd = dim * bytesPerDim + bytesPerDim;
|
||||||
|
final int dataOffset = numIndexDims * bytesPerDim;
|
||||||
|
// data length is composed by the data dimensions plus the docID
|
||||||
|
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
|
||||||
return new IntroSorter() {
|
return new IntroSorter() {
|
||||||
|
|
||||||
int pivotDoc = -1;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void swap(int i, int j) {
|
protected void swap(int i, int j) {
|
||||||
points.swap(i, j);
|
points.swap(i, j);
|
||||||
|
@ -433,36 +448,37 @@ public final class BKDRadixSelector {
|
||||||
@Override
|
@Override
|
||||||
protected void setPivot(int i) {
|
protected void setPivot(int i) {
|
||||||
if (skypedBytes < bytesPerDim) {
|
if (skypedBytes < bytesPerDim) {
|
||||||
System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
|
||||||
}
|
}
|
||||||
pivotDoc = points.docIDs[i];
|
System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int compare(int i, int j) {
|
protected int compare(int i, int j) {
|
||||||
if (skypedBytes < bytesPerDim) {
|
if (skypedBytes < bytesPerDim) {
|
||||||
int iOffset = i * packedBytesLength;
|
int iOffset = i * packedBytesDocIDLength;
|
||||||
int jOffset = j * packedBytesLength;
|
int jOffset = j * packedBytesDocIDLength;
|
||||||
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end,
|
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||||
points.block, jOffset + start, jOffset + end);
|
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return points.docIDs[i] - points.docIDs[j];
|
int iOffset = i * packedBytesDocIDLength + dataOffset;
|
||||||
|
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||||
|
return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
if (skypedBytes < bytesPerDim) {
|
if (skypedBytes < bytesPerDim) {
|
||||||
int jOffset = j * packedBytesLength;
|
int jOffset = j * packedBytesDocIDLength;
|
||||||
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim,
|
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
|
||||||
points.block, jOffset + start, jOffset + end);
|
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return pivotDoc - points.docIDs[j];
|
int jOffset = j * packedBytesDocIDLength + dataOffset;
|
||||||
|
return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -776,7 +776,7 @@ public class BKDWriter implements Closeable {
|
||||||
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
|
||||||
|
|
||||||
//We re-use the selector so we do not need to create an object every time.
|
//We re-use the selector so we do not need to create an object every time.
|
||||||
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
|
||||||
|
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
|
@ -787,7 +787,8 @@ public class BKDWriter implements Closeable {
|
||||||
minPackedValue, maxPackedValue,
|
minPackedValue, maxPackedValue,
|
||||||
parentSplits,
|
parentSplits,
|
||||||
splitPackedValues,
|
splitPackedValues,
|
||||||
leafBlockFPs);
|
leafBlockFPs,
|
||||||
|
new int[maxPointsInLeafNode]);
|
||||||
assert Arrays.equals(parentSplits, new int[numIndexDims]);
|
assert Arrays.equals(parentSplits, new int[numIndexDims]);
|
||||||
|
|
||||||
// If no exception, we should have cleaned everything up:
|
// If no exception, we should have cleaned everything up:
|
||||||
|
@ -1366,7 +1367,7 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// sort by sortedDim
|
// sort by sortedDim
|
||||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
|
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
|
||||||
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
reader, from, to, scratchBytesRef1, scratchBytesRef2);
|
||||||
|
|
||||||
BytesRef comparator = scratchBytesRef1;
|
BytesRef comparator = scratchBytesRef1;
|
||||||
|
@ -1435,7 +1436,7 @@ public class BKDWriter implements Closeable {
|
||||||
commonPrefixLen = bytesPerDim;
|
commonPrefixLen = bytesPerDim;
|
||||||
}
|
}
|
||||||
|
|
||||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
|
||||||
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
|
||||||
|
|
||||||
// set the split value
|
// set the split value
|
||||||
|
@ -1472,7 +1473,8 @@ public class BKDWriter implements Closeable {
|
||||||
byte[] minPackedValue, byte[] maxPackedValue,
|
byte[] minPackedValue, byte[] maxPackedValue,
|
||||||
int[] parentSplits,
|
int[] parentSplits,
|
||||||
byte[] splitPackedValues,
|
byte[] splitPackedValues,
|
||||||
long[] leafBlockFPs) throws IOException {
|
long[] leafBlockFPs,
|
||||||
|
int[] spareDocIds) throws IOException {
|
||||||
|
|
||||||
if (nodeID >= leafNodeOffset) {
|
if (nodeID >= leafNodeOffset) {
|
||||||
|
|
||||||
|
@ -1532,7 +1534,13 @@ public class BKDWriter implements Closeable {
|
||||||
// loading the values:
|
// loading the values:
|
||||||
int count = to - from;
|
int count = to - from;
|
||||||
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
|
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
|
||||||
writeLeafBlockDocs(out, heapSource.docIDs, from, count);
|
assert count <= spareDocIds.length : "count=" + count + " > length=" + spareDocIds.length;
|
||||||
|
// Write doc IDs
|
||||||
|
int[] docIDs = spareDocIds;
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
|
||||||
|
}
|
||||||
|
writeLeafBlockDocs(out, docIDs, 0, count);
|
||||||
|
|
||||||
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
|
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
|
||||||
// from the index, much like how terms dict does so from the FST:
|
// from the index, much like how terms dict does so from the FST:
|
||||||
|
@ -1555,7 +1563,7 @@ public class BKDWriter implements Closeable {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
|
||||||
heapSource.docIDs, from);
|
docIDs, 0);
|
||||||
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
|
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -1602,12 +1610,12 @@ public class BKDWriter implements Closeable {
|
||||||
// Recurse on left tree:
|
// Recurse on left tree:
|
||||||
build(2 * nodeID, leafNodeOffset, slices[0],
|
build(2 * nodeID, leafNodeOffset, slices[0],
|
||||||
out, radixSelector, minPackedValue, maxSplitPackedValue,
|
out, radixSelector, minPackedValue, maxSplitPackedValue,
|
||||||
parentSplits, splitPackedValues, leafBlockFPs);
|
parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||||
|
|
||||||
// Recurse on right tree:
|
// Recurse on right tree:
|
||||||
build(2 * nodeID + 1, leafNodeOffset, slices[1],
|
build(2 * nodeID + 1, leafNodeOffset, slices[1],
|
||||||
out, radixSelector, minSplitPackedValue, maxPackedValue
|
out, radixSelector, minSplitPackedValue, maxPackedValue
|
||||||
, parentSplits, splitPackedValues, leafBlockFPs);
|
, parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
|
||||||
|
|
||||||
parentSplits[splitDim]--;
|
parentSplits[splitDim]--;
|
||||||
}
|
}
|
||||||
|
@ -1661,6 +1669,13 @@ public class BKDWriter implements Closeable {
|
||||||
if (cmp > 0) {
|
if (cmp > 0) {
|
||||||
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||||
}
|
}
|
||||||
|
if (cmp == 0 && numDataDims > numIndexDims) {
|
||||||
|
int dataOffset = numIndexDims * bytesPerDim;
|
||||||
|
cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
|
||||||
|
if (cmp > 0) {
|
||||||
|
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
|
||||||
|
}
|
||||||
|
}
|
||||||
if (cmp == 0 && doc < lastDoc) {
|
if (cmp == 0 && doc < lastDoc) {
|
||||||
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
|
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,16 +27,16 @@ public final class HeapPointReader implements PointReader {
|
||||||
private int curRead;
|
private int curRead;
|
||||||
final byte[] block;
|
final byte[] block;
|
||||||
final int packedBytesLength;
|
final int packedBytesLength;
|
||||||
final int[] docIDs;
|
final int packedBytesDocIDLength;
|
||||||
final int end;
|
final int end;
|
||||||
private final HeapPointValue pointValue;
|
private final HeapPointValue pointValue;
|
||||||
|
|
||||||
public HeapPointReader(byte[] block, int packedBytesLength, int[] docIDs, int start, int end) {
|
public HeapPointReader(byte[] block, int packedBytesLength, int start, int end) {
|
||||||
this.block = block;
|
this.block = block;
|
||||||
this.docIDs = docIDs;
|
|
||||||
curRead = start-1;
|
curRead = start-1;
|
||||||
this.end = end;
|
this.end = end;
|
||||||
this.packedBytesLength = packedBytesLength;
|
this.packedBytesLength = packedBytesLength;
|
||||||
|
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
|
||||||
if (start < end) {
|
if (start < end) {
|
||||||
this.pointValue = new HeapPointValue(block, packedBytesLength);
|
this.pointValue = new HeapPointValue(block, packedBytesLength);
|
||||||
} else {
|
} else {
|
||||||
|
@ -53,7 +53,7 @@ public final class HeapPointReader implements PointReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public PointValue pointValue() {
|
public PointValue pointValue() {
|
||||||
pointValue.setValue(curRead * packedBytesLength, docIDs[curRead]);
|
pointValue.setOffset(curRead * packedBytesDocIDLength);
|
||||||
return pointValue;
|
return pointValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -66,21 +66,22 @@ public final class HeapPointReader implements PointReader {
|
||||||
*/
|
*/
|
||||||
static class HeapPointValue implements PointValue {
|
static class HeapPointValue implements PointValue {
|
||||||
|
|
||||||
BytesRef packedValue;
|
final BytesRef packedValue;
|
||||||
BytesRef docIDBytes;
|
final BytesRef packedValueDocID;
|
||||||
int docID;
|
final int packedValueLength;
|
||||||
|
|
||||||
public HeapPointValue(byte[] value, int packedLength) {
|
HeapPointValue(byte[] value, int packedValueLength) {
|
||||||
packedValue = new BytesRef(value, 0, packedLength);
|
this.packedValueLength = packedValueLength;
|
||||||
docIDBytes = new BytesRef(new byte[4]);
|
this.packedValue = new BytesRef(value, 0, packedValueLength);
|
||||||
|
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets a new value by changing the offset and docID.
|
* Sets a new value by changing the offset.
|
||||||
*/
|
*/
|
||||||
public void setValue(int offset, int docID) {
|
public void setOffset(int offset) {
|
||||||
this.docID = docID;
|
|
||||||
packedValue.offset = offset;
|
packedValue.offset = offset;
|
||||||
|
packedValueDocID.offset = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -90,16 +91,14 @@ public final class HeapPointReader implements PointReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() {
|
public int docID() {
|
||||||
return docID;
|
int position = packedValueDocID.offset + packedValueLength;
|
||||||
|
return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
|
||||||
|
| ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef docIDBytes() {
|
public BytesRef packedValueDocIDBytes() {
|
||||||
docIDBytes.bytes[0] = (byte) (docID >> 24);
|
return packedValueDocID;
|
||||||
docIDBytes.bytes[1] = (byte) (docID >> 16);
|
|
||||||
docIDBytes.bytes[2] = (byte) (docID >> 8);
|
|
||||||
docIDBytes.bytes[3] = (byte) (docID >> 0);
|
|
||||||
return docIDBytes;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,10 +25,10 @@ import org.apache.lucene.util.FutureArrays;
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
* */
|
* */
|
||||||
public final class HeapPointWriter implements PointWriter {
|
public final class HeapPointWriter implements PointWriter {
|
||||||
public final int[] docIDs;
|
|
||||||
public final byte[] block;
|
public final byte[] block;
|
||||||
final int size;
|
final int size;
|
||||||
final int packedBytesLength;
|
final int packedBytesLength;
|
||||||
|
final int packedBytesDocIDLength;
|
||||||
private final byte[] scratch;
|
private final byte[] scratch;
|
||||||
private int nextWrite;
|
private int nextWrite;
|
||||||
private boolean closed;
|
private boolean closed;
|
||||||
|
@ -37,11 +37,11 @@ public final class HeapPointWriter implements PointWriter {
|
||||||
|
|
||||||
|
|
||||||
public HeapPointWriter(int size, int packedBytesLength) {
|
public HeapPointWriter(int size, int packedBytesLength) {
|
||||||
this.docIDs = new int[size];
|
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
|
||||||
this.block = new byte[packedBytesLength * size];
|
|
||||||
this.size = size;
|
|
||||||
this.packedBytesLength = packedBytesLength;
|
this.packedBytesLength = packedBytesLength;
|
||||||
this.scratch = new byte[packedBytesLength];
|
this.block = new byte[packedBytesDocIDLength * size];
|
||||||
|
this.size = size;
|
||||||
|
this.scratch = new byte[packedBytesDocIDLength];
|
||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength);
|
pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength);
|
||||||
} else {
|
} else {
|
||||||
|
@ -53,7 +53,7 @@ public final class HeapPointWriter implements PointWriter {
|
||||||
/** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
|
/** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
|
||||||
public PointValue getPackedValueSlice(int index) {
|
public PointValue getPackedValueSlice(int index) {
|
||||||
assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
|
assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
|
||||||
pointValue.setValue(index * packedBytesLength, docIDs[index]);
|
pointValue.setOffset(index * packedBytesDocIDLength);
|
||||||
return pointValue;
|
return pointValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,8 +62,12 @@ public final class HeapPointWriter implements PointWriter {
|
||||||
assert closed == false : "point writer is already closed";
|
assert closed == false : "point writer is already closed";
|
||||||
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
|
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
|
||||||
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
||||||
System.arraycopy(packedValue, 0, block, nextWrite * packedBytesLength, packedBytesLength);
|
System.arraycopy(packedValue, 0, block, nextWrite * packedBytesDocIDLength, packedBytesLength);
|
||||||
docIDs[nextWrite] = docID;
|
int position = nextWrite * packedBytesDocIDLength + packedBytesLength;
|
||||||
|
block[position] = (byte) (docID >> 24);
|
||||||
|
block[++position] = (byte) (docID >> 16);
|
||||||
|
block[++position] = (byte) (docID >> 8);
|
||||||
|
block[++position] = (byte) (docID >> 0);
|
||||||
nextWrite++;
|
nextWrite++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,27 +75,23 @@ public final class HeapPointWriter implements PointWriter {
|
||||||
public void append(PointValue pointValue) {
|
public void append(PointValue pointValue) {
|
||||||
assert closed == false : "point writer is already closed";
|
assert closed == false : "point writer is already closed";
|
||||||
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
|
||||||
BytesRef packedValue = pointValue.packedValue();
|
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||||
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + (packedBytesLength) + "] but was [" + packedValue.length + "]";
|
assert packedValueDocID.length == packedBytesDocIDLength : "[packedValue] must have length [" + (packedBytesDocIDLength) + "] but was [" + packedValueDocID.length + "]";
|
||||||
System.arraycopy(packedValue.bytes, packedValue.offset, block, nextWrite * packedBytesLength, packedBytesLength);
|
System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset, block, nextWrite * packedBytesDocIDLength, packedBytesDocIDLength);
|
||||||
docIDs[nextWrite] = pointValue.docID();
|
|
||||||
nextWrite++;
|
nextWrite++;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void swap(int i, int j) {
|
public void swap(int i, int j) {
|
||||||
int docID = docIDs[i];
|
|
||||||
docIDs[i] = docIDs[j];
|
|
||||||
docIDs[j] = docID;
|
|
||||||
|
|
||||||
int indexI = i * packedBytesLength;
|
int indexI = i * packedBytesDocIDLength;
|
||||||
int indexJ = j * packedBytesLength;
|
int indexJ = j * packedBytesDocIDLength;
|
||||||
|
|
||||||
// scratch1 = values[i]
|
// scratch1 = values[i]
|
||||||
System.arraycopy(block, indexI, scratch, 0, packedBytesLength);
|
System.arraycopy(block, indexI, scratch, 0, packedBytesDocIDLength);
|
||||||
// values[i] = values[j]
|
// values[i] = values[j]
|
||||||
System.arraycopy(block, indexJ, block, indexI, packedBytesLength);
|
System.arraycopy(block, indexJ, block, indexI, packedBytesDocIDLength);
|
||||||
// values[j] = scratch1
|
// values[j] = scratch1
|
||||||
System.arraycopy(scratch, 0, block, indexJ, packedBytesLength);
|
System.arraycopy(scratch, 0, block, indexJ, packedBytesDocIDLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
|
public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
|
||||||
|
@ -101,8 +101,8 @@ public final class HeapPointWriter implements PointWriter {
|
||||||
for (int dim = 0; dim < numDataDims; dim++) {
|
for (int dim = 0; dim < numDataDims; dim++) {
|
||||||
final int start = dim * bytesPerDim + commonPrefixLengths[dim];
|
final int start = dim * bytesPerDim + commonPrefixLengths[dim];
|
||||||
final int end = dim * bytesPerDim + bytesPerDim;
|
final int end = dim * bytesPerDim + bytesPerDim;
|
||||||
if (FutureArrays.mismatch(block, i * packedBytesLength + start, i * packedBytesLength + end,
|
if (FutureArrays.mismatch(block, i * packedBytesDocIDLength + start, i * packedBytesDocIDLength + end,
|
||||||
block, (i - 1) * packedBytesLength + start, (i - 1) * packedBytesLength + end) != -1) {
|
block, (i - 1) * packedBytesDocIDLength + start, (i - 1) * packedBytesDocIDLength + end) != -1) {
|
||||||
leafCardinality++;
|
leafCardinality++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -119,9 +119,9 @@ public final class HeapPointWriter implements PointWriter {
|
||||||
@Override
|
@Override
|
||||||
public PointReader getReader(long start, long length) {
|
public PointReader getReader(long start, long length) {
|
||||||
assert closed : "point writer is still open and trying to get a reader";
|
assert closed : "point writer is still open and trying to get a reader";
|
||||||
assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
|
assert start + length <= size: "start=" + start + " length=" + length + " docIDs.length=" + size;
|
||||||
assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
|
assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
|
||||||
return new HeapPointReader(block, packedBytesLength, docIDs, (int) start, Math.toIntExact(start+length));
|
return new HeapPointReader(block, packedBytesLength, (int) start, Math.toIntExact(start+length));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -135,6 +135,6 @@ public final class HeapPointWriter implements PointWriter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "HeapPointWriter(count=" + nextWrite + " size=" + docIDs.length + ")";
|
return "HeapPointWriter(count=" + nextWrite + " size=" + size + ")";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,7 +77,8 @@ public final class MutablePointsReaderUtils {
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
if (k < packedBytesLength) {
|
if (k < packedBytesLength) {
|
||||||
reader.getValue(j, scratch);
|
reader.getValue(j, scratch);
|
||||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k, scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
|
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k,
|
||||||
|
scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
@ -91,14 +92,16 @@ public final class MutablePointsReaderUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Sort points on the given dimension. */
|
/** Sort points on the given dimension. */
|
||||||
public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
|
public static void sortByDim(int numDataDim, int numIndexDim, int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
|
||||||
MutablePointValues reader, int from, int to,
|
MutablePointValues reader, int from, int to,
|
||||||
BytesRef scratch1, BytesRef scratch2) {
|
BytesRef scratch1, BytesRef scratch2) {
|
||||||
|
|
||||||
|
final int start = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
|
||||||
|
final int dimEnd = sortedDim * bytesPerDim + bytesPerDim;
|
||||||
|
final int dataStart = numIndexDim * bytesPerDim;
|
||||||
|
final int dataEnd = dataStart + (numDataDim - numIndexDim) * bytesPerDim;
|
||||||
// No need for a fancy radix sort here, this is called on the leaves only so
|
// No need for a fancy radix sort here, this is called on the leaves only so
|
||||||
// there are not many values to sort
|
// there are not many values to sort
|
||||||
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
|
|
||||||
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
|
|
||||||
new IntroSorter() {
|
new IntroSorter() {
|
||||||
|
|
||||||
final BytesRef pivot = scratch1;
|
final BytesRef pivot = scratch1;
|
||||||
|
@ -118,9 +121,14 @@ public final class MutablePointsReaderUtils {
|
||||||
@Override
|
@Override
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
reader.getValue(j, scratch2);
|
reader.getValue(j, scratch2);
|
||||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset, pivot.offset + offset + numBytesToCompare, scratch2.bytes, scratch2.offset + offset, scratch2.offset + offset + numBytesToCompare);
|
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + start, pivot.offset + dimEnd, scratch2.bytes,
|
||||||
|
scratch2.offset + start, scratch2.offset + dimEnd);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
cmp = pivotDoc - reader.getDocID(j);
|
cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
|
||||||
|
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
|
||||||
|
if (cmp == 0) {
|
||||||
|
cmp = pivotDoc - reader.getDocID(j);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
@ -130,16 +138,20 @@ public final class MutablePointsReaderUtils {
|
||||||
/** Partition points around {@code mid}. All values on the left must be less
|
/** Partition points around {@code mid}. All values on the left must be less
|
||||||
* than or equal to it and all values on the right must be greater than or
|
* than or equal to it and all values on the right must be greater than or
|
||||||
* equal to it. */
|
* equal to it. */
|
||||||
public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
|
public static void partition(int numDataDim, int numIndexDim, int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
|
||||||
MutablePointValues reader, int from, int to, int mid,
|
MutablePointValues reader, int from, int to, int mid,
|
||||||
BytesRef scratch1, BytesRef scratch2) {
|
BytesRef scratch1, BytesRef scratch2) {
|
||||||
final int offset = splitDim * bytesPerDim + commonPrefixLen;
|
final int dimOffset = splitDim * bytesPerDim + commonPrefixLen;
|
||||||
final int cmpBytes = bytesPerDim - commonPrefixLen;
|
final int dimCmpBytes = bytesPerDim - commonPrefixLen;
|
||||||
|
final int dataOffset = numIndexDim * bytesPerDim;
|
||||||
|
final int dataCmpBytes = (numDataDim - numIndexDim) * bytesPerDim + dimCmpBytes;
|
||||||
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
|
||||||
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) {
|
new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Selector getFallbackSelector(int k) {
|
protected Selector getFallbackSelector(int k) {
|
||||||
|
final int dataStart = (k < dimCmpBytes) ? dataOffset : dataOffset + k - dimCmpBytes;
|
||||||
|
final int dataEnd = numDataDim * bytesPerDim;
|
||||||
return new IntroSelector() {
|
return new IntroSelector() {
|
||||||
|
|
||||||
final BytesRef pivot = scratch1;
|
final BytesRef pivot = scratch1;
|
||||||
|
@ -158,9 +170,18 @@ public final class MutablePointsReaderUtils {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int comparePivot(int j) {
|
protected int comparePivot(int j) {
|
||||||
if (k < cmpBytes) {
|
if (k < dimCmpBytes) {
|
||||||
reader.getValue(j, scratch2);
|
reader.getValue(j, scratch2);
|
||||||
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset + k, pivot.offset + offset + k + cmpBytes - k, scratch2.bytes, scratch2.offset + offset + k, scratch2.offset + offset + k + cmpBytes - k);
|
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dimOffset + k, pivot.offset + dimOffset + dimCmpBytes,
|
||||||
|
scratch2.bytes, scratch2.offset + dimOffset + k, scratch2.offset + dimOffset + dimCmpBytes);
|
||||||
|
if (cmp != 0) {
|
||||||
|
return cmp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (k < dataCmpBytes) {
|
||||||
|
reader.getValue(j, scratch2);
|
||||||
|
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
|
||||||
|
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
|
||||||
if (cmp != 0) {
|
if (cmp != 0) {
|
||||||
return cmp;
|
return cmp;
|
||||||
}
|
}
|
||||||
|
@ -177,10 +198,12 @@ public final class MutablePointsReaderUtils {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected int byteAt(int i, int k) {
|
protected int byteAt(int i, int k) {
|
||||||
if (k < cmpBytes) {
|
if (k < dimCmpBytes) {
|
||||||
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k));
|
return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
|
||||||
|
} else if (k < dataCmpBytes) {
|
||||||
|
return Byte.toUnsignedInt(reader.getByteAt(i, dataOffset + k - dimCmpBytes));
|
||||||
} else {
|
} else {
|
||||||
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3);
|
final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
|
||||||
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -137,12 +137,14 @@ public final class OfflinePointReader implements PointReader {
|
||||||
*/
|
*/
|
||||||
static class OfflinePointValue implements PointValue {
|
static class OfflinePointValue implements PointValue {
|
||||||
|
|
||||||
BytesRef packedValue;
|
final BytesRef packedValue;
|
||||||
BytesRef docIDBytes;
|
final BytesRef packedValueDocID;
|
||||||
|
final int packedValueLength;
|
||||||
|
|
||||||
OfflinePointValue(byte[] value, int packedValueLength) {
|
OfflinePointValue(byte[] value, int packedValueLength) {
|
||||||
packedValue = new BytesRef(value, 0, packedValueLength);
|
this.packedValueLength = packedValueLength;
|
||||||
docIDBytes = new BytesRef(value, packedValueLength, Integer.BYTES);
|
this.packedValue = new BytesRef(value, 0, packedValueLength);
|
||||||
|
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -150,7 +152,7 @@ public final class OfflinePointReader implements PointReader {
|
||||||
*/
|
*/
|
||||||
public void setOffset(int offset) {
|
public void setOffset(int offset) {
|
||||||
packedValue.offset = offset;
|
packedValue.offset = offset;
|
||||||
docIDBytes.offset = offset + packedValue.length;
|
packedValueDocID.offset = offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -160,14 +162,14 @@ public final class OfflinePointReader implements PointReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int docID() {
|
public int docID() {
|
||||||
int position =docIDBytes.offset;
|
int position = packedValueDocID.offset + packedValueLength;
|
||||||
return ((docIDBytes.bytes[position] & 0xFF) << 24) | ((docIDBytes.bytes[++position] & 0xFF) << 16)
|
return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
|
||||||
| ((docIDBytes.bytes[++position] & 0xFF) << 8) | (docIDBytes.bytes[++position] & 0xFF);
|
| ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public BytesRef docIDBytes() {
|
public BytesRef packedValueDocIDBytes() {
|
||||||
return docIDBytes;
|
return packedValueDocID;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -62,12 +62,9 @@ public final class OfflinePointWriter implements PointWriter {
|
||||||
@Override
|
@Override
|
||||||
public void append(PointValue pointValue) throws IOException {
|
public void append(PointValue pointValue) throws IOException {
|
||||||
assert closed == false : "Point writer is already closed";
|
assert closed == false : "Point writer is already closed";
|
||||||
BytesRef packedValue = pointValue.packedValue();
|
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
|
||||||
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
|
assert packedValueDocID.length == packedBytesLength + Integer.BYTES : "[packedValue and docID] must have length [" + (packedBytesLength + Integer.BYTES) + "] but was [" + packedValueDocID.length + "]";
|
||||||
out.writeBytes(packedValue.bytes, packedValue.offset, packedValue.length);
|
out.writeBytes(packedValueDocID.bytes, packedValueDocID.offset, packedValueDocID.length);
|
||||||
BytesRef docIDBytes = pointValue.docIDBytes();
|
|
||||||
assert docIDBytes.length == Integer.BYTES : "[docIDBytes] must have length [" + Integer.BYTES + "] but was [" + docIDBytes.length + "]";
|
|
||||||
out.writeBytes(docIDBytes.bytes, docIDBytes.offset, docIDBytes.length);
|
|
||||||
count++;
|
count++;
|
||||||
assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count;
|
assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count;
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,10 +28,11 @@ public interface PointValue {
|
||||||
/** Returns the packed values for the dimensions */
|
/** Returns the packed values for the dimensions */
|
||||||
BytesRef packedValue();
|
BytesRef packedValue();
|
||||||
|
|
||||||
/** Returns the document id */
|
/** Returns the docID */
|
||||||
int docID();
|
int docID();
|
||||||
|
|
||||||
/** Returns the byte representation of the document id */
|
/** Returns the byte representation of the packed value
|
||||||
BytesRef docIDBytes();
|
* together with the docID */
|
||||||
|
BytesRef packedValueDocIDBytes();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1098,6 +1098,80 @@ public class TestBKD extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testCheckDataDimOptimalOrder() throws IOException {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
final int numValues = atLeast(5000);
|
||||||
|
final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
|
||||||
|
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
|
||||||
|
final double maxMB = (float) 3.0 + (3*random().nextDouble());
|
||||||
|
|
||||||
|
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
|
||||||
|
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
|
||||||
|
|
||||||
|
final byte[] pointValue1 = new byte[numDataDims * numBytesPerDim];
|
||||||
|
final byte[] pointValue2 = new byte[numDataDims * numBytesPerDim];
|
||||||
|
random().nextBytes(pointValue1);
|
||||||
|
random().nextBytes(pointValue2);
|
||||||
|
// equal index dimensions but different data dimensions
|
||||||
|
for (int i = 0; i < numIndexDims; i++) {
|
||||||
|
System.arraycopy(pointValue1, i * numBytesPerDim, pointValue2, i * numBytesPerDim, numBytesPerDim);
|
||||||
|
}
|
||||||
|
|
||||||
|
BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode,
|
||||||
|
maxMB, 2 * numValues);
|
||||||
|
for (int i = 0; i < numValues; ++i) {
|
||||||
|
w.add(pointValue1, i);
|
||||||
|
w.add(pointValue2, i);
|
||||||
|
}
|
||||||
|
final long indexFP;
|
||||||
|
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
|
||||||
|
indexFP = w.finish(out);
|
||||||
|
w.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
|
||||||
|
pointsIn.seek(indexFP);
|
||||||
|
BKDReader points = new BKDReader(pointsIn);
|
||||||
|
|
||||||
|
points.intersect(new IntersectVisitor() {
|
||||||
|
|
||||||
|
byte[] previous = null;
|
||||||
|
boolean hasChanged = false;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(int docID) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void visit(int docID, byte[] packedValue) {
|
||||||
|
if (previous == null) {
|
||||||
|
previous = new byte[numDataDims * numBytesPerDim];
|
||||||
|
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
|
||||||
|
} else {
|
||||||
|
int mismatch = FutureArrays.mismatch(packedValue, 0, numDataDims * numBytesPerDim, previous, 0, numDataDims * numBytesPerDim);
|
||||||
|
if (mismatch != -1) {
|
||||||
|
if (hasChanged == false) {
|
||||||
|
hasChanged = true;
|
||||||
|
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
|
||||||
|
} else {
|
||||||
|
fail("Points are not in optimal order");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
|
||||||
|
return Relation.CELL_CROSSES_QUERY;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
pointsIn.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
public void test2DLongOrdsOffline() throws Exception {
|
public void test2DLongOrdsOffline() throws Exception {
|
||||||
try (Directory dir = newDirectory()) {
|
try (Directory dir = newDirectory()) {
|
||||||
int numDocs = 100000;
|
int numDocs = 100000;
|
||||||
|
|
|
@ -209,7 +209,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
|
private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
|
||||||
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
|
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
|
||||||
//we only split by indexed dimension so we check for each only those dimension
|
//we only split by indexed dimension so we check for each only those dimension
|
||||||
for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
|
for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
|
||||||
//We need to make a copy of the data as it is deleted in the process
|
//We need to make a copy of the data as it is deleted in the process
|
||||||
|
@ -225,9 +225,15 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
||||||
int cmp = FutureArrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions);
|
int cmp = FutureArrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions);
|
||||||
assertTrue(cmp <= 0);
|
assertTrue(cmp <= 0);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint);
|
byte[] maxDataDim = getMaxDataDimension(slices[0], bytesPerDimensions, dataDimensions, indexDimensions, max, splitDim);
|
||||||
int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint);
|
byte[] minDataDim = getMinDataDimension(slices[1], bytesPerDimensions, dataDimensions, indexDimensions, min, splitDim);
|
||||||
assertTrue(minDocId >= maxDocID);
|
cmp = FutureArrays.compareUnsigned(maxDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions, minDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions);
|
||||||
|
assertTrue(cmp <= 0);
|
||||||
|
if (cmp == 0) {
|
||||||
|
int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,maxDataDim);
|
||||||
|
int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,minDataDim);
|
||||||
|
assertTrue(minDocId >= maxDocID);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
assertTrue(Arrays.equals(partitionPoint, min));
|
assertTrue(Arrays.equals(partitionPoint, min));
|
||||||
slices[0].writer.destroy();
|
slices[0].writer.destroy();
|
||||||
|
@ -293,14 +299,17 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
||||||
return min;
|
return min;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException {
|
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
|
||||||
int docID = Integer.MAX_VALUE;
|
int docID = Integer.MAX_VALUE;
|
||||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||||
while (reader.next()) {
|
while (reader.next()) {
|
||||||
PointValue pointValue = reader.pointValue();
|
PointValue pointValue = reader.pointValue();
|
||||||
BytesRef packedValue = pointValue.packedValue();
|
BytesRef packedValue = pointValue.packedValue();
|
||||||
int offset = dimension * bytesPerDimension;
|
int offset = dimension * bytesPerDimension;
|
||||||
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
|
int dataOffset = indexDims * bytesPerDimension;
|
||||||
|
int dataLength = (dataDims - indexDims) * bytesPerDimension;
|
||||||
|
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
|
||||||
|
&& FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
|
||||||
int newDocID = pointValue.docID();
|
int newDocID = pointValue.docID();
|
||||||
if (newDocID < docID) {
|
if (newDocID < docID) {
|
||||||
docID = newDocID;
|
docID = newDocID;
|
||||||
|
@ -311,6 +320,26 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
||||||
return docID;
|
return docID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private byte[] getMinDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] minDim, int splitDim) throws IOException {
|
||||||
|
byte[] min = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||||
|
Arrays.fill(min, (byte) 0xff);
|
||||||
|
int offset = splitDim * bytesPerDimension;
|
||||||
|
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||||
|
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||||
|
while (reader.next()) {
|
||||||
|
PointValue pointValue = reader.pointValue();
|
||||||
|
BytesRef packedValue = pointValue.packedValue();
|
||||||
|
if (FutureArrays.mismatch(minDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
|
||||||
|
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||||
|
if (FutureArrays.compareUnsigned(min, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) > 0) {
|
||||||
|
System.arraycopy(value, 0, min, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return min;
|
||||||
|
}
|
||||||
|
|
||||||
private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException {
|
private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException {
|
||||||
byte[] max = new byte[bytesPerDimension];
|
byte[] max = new byte[bytesPerDimension];
|
||||||
Arrays.fill(max, (byte) 0);
|
Arrays.fill(max, (byte) 0);
|
||||||
|
@ -328,14 +357,37 @@ public class TestBKDRadixSelector extends LuceneTestCase {
|
||||||
return max;
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException {
|
private byte[] getMaxDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] maxDim, int splitDim) throws IOException {
|
||||||
|
byte[] max = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||||
|
Arrays.fill(max, (byte) 0);
|
||||||
|
int offset = splitDim * bytesPerDimension;
|
||||||
|
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||||
|
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
|
||||||
|
while (reader.next()) {
|
||||||
|
PointValue pointValue = reader.pointValue();
|
||||||
|
BytesRef packedValue = pointValue.packedValue();
|
||||||
|
if (FutureArrays.mismatch(maxDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
|
||||||
|
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||||
|
if (FutureArrays.compareUnsigned(max, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) < 0) {
|
||||||
|
System.arraycopy(value, 0, max, 0, (dataDims - indexDims) * bytesPerDimension);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
|
||||||
int docID = Integer.MIN_VALUE;
|
int docID = Integer.MIN_VALUE;
|
||||||
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
|
||||||
while (reader.next()) {
|
while (reader.next()) {
|
||||||
PointValue pointValue = reader.pointValue();
|
PointValue pointValue = reader.pointValue();
|
||||||
BytesRef packedValue = pointValue.packedValue();
|
BytesRef packedValue = pointValue.packedValue();
|
||||||
int offset = dimension * bytesPerDimension;
|
int offset = dimension * bytesPerDimension;
|
||||||
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) {
|
int dataOffset = indexDims * bytesPerDimension;
|
||||||
|
int dataLength = (dataDims - indexDims) * bytesPerDimension;
|
||||||
|
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
|
||||||
|
&& FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
|
||||||
int newDocID = pointValue.docID();
|
int newDocID = pointValue.docID();
|
||||||
if (newDocID > docID) {
|
if (newDocID > docID) {
|
||||||
docID = newDocID;
|
docID = newDocID;
|
||||||
|
|
|
@ -117,7 +117,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
||||||
private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
|
private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
|
||||||
int packedBytesLength = dataDimensions * bytesPerDim;
|
int packedBytesLength = dataDimensions * bytesPerDim;
|
||||||
Directory dir = newDirectory();
|
Directory dir = newDirectory();
|
||||||
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDim, 1000, dir, "test");
|
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDim, 1000, dir, "test");
|
||||||
// we check for each dimension
|
// we check for each dimension
|
||||||
for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
|
for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
|
||||||
radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
|
radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
|
||||||
|
@ -130,6 +130,11 @@ public class TestBKDRadixSort extends LuceneTestCase {
|
||||||
BytesRef value = pointValue.packedValue();
|
BytesRef value = pointValue.packedValue();
|
||||||
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
|
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
|
||||||
assertTrue(cmp >= 0);
|
assertTrue(cmp >= 0);
|
||||||
|
if (cmp == 0) {
|
||||||
|
int dataOffset = indexDimensions * bytesPerDim;
|
||||||
|
cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataOffset, value.offset + packedBytesLength, previous, dataOffset, packedBytesLength);
|
||||||
|
assertTrue(cmp >= 0);
|
||||||
|
}
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
assertTrue(pointValue.docID() >= previousDocId);
|
assertTrue(pointValue.docID() >= previousDocId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||||
private void doTestSort() {
|
private void doTestSort() {
|
||||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||||
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc);
|
Point[] points = createRandomPoints(1, 1, bytesPerDim, maxDoc, new int[1]);
|
||||||
DummyPointsReader reader = new DummyPointsReader(points);
|
DummyPointsReader reader = new DummyPointsReader(points);
|
||||||
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
|
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
|
||||||
Arrays.sort(points, new Comparator<Point>() {
|
Arrays.sort(points, new Comparator<Point>() {
|
||||||
|
@ -62,25 +62,15 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doTestSortByDim() {
|
private void doTestSortByDim() {
|
||||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
|
||||||
|
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
|
||||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
int[] commonPrefixLengths = new int[numDataDims];
|
||||||
int[] commonPrefixLengths = new int[numDims];
|
Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
|
||||||
for (int i = 0; i < commonPrefixLengths.length; ++i) {
|
|
||||||
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
|
||||||
}
|
|
||||||
BytesRef firstValue = points[0].packedValue;
|
|
||||||
for (int i = 1; i < points.length; ++i) {
|
|
||||||
for (int dim = 0; dim < numDims; ++dim) {
|
|
||||||
int offset = dim * bytesPerDim;
|
|
||||||
BytesRef packedValue = points[i].packedValue;
|
|
||||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DummyPointsReader reader = new DummyPointsReader(points);
|
DummyPointsReader reader = new DummyPointsReader(points);
|
||||||
final int sortedDim = random().nextInt(numDims);
|
final int sortedDim = random().nextInt(numIndexDims);
|
||||||
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
|
MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
|
||||||
new BytesRef(), new BytesRef());
|
new BytesRef(), new BytesRef());
|
||||||
for (int i = 1; i < points.length; ++i) {
|
for (int i = 1; i < points.length; ++i) {
|
||||||
final int offset = sortedDim * bytesPerDim;
|
final int offset = sortedDim * bytesPerDim;
|
||||||
|
@ -88,7 +78,13 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||||
BytesRef currentValue = reader.points[i].packedValue;
|
BytesRef currentValue = reader.points[i].packedValue;
|
||||||
int cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim);
|
int cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
cmp = reader.points[i - 1].doc - reader.points[i].doc;
|
int dataDimOffset = numIndexDims * bytesPerDim;
|
||||||
|
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
|
||||||
|
cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + dataDimOffset, previousValue.offset + dataDimOffset + dataDimsLength,
|
||||||
|
currentValue.bytes, currentValue.offset + dataDimOffset, currentValue.offset + dataDimOffset + dataDimsLength);
|
||||||
|
if (cmp == 0) {
|
||||||
|
cmp = reader.points[i - 1].doc - reader.points[i].doc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
assertTrue(cmp <= 0);
|
assertTrue(cmp <= 0);
|
||||||
}
|
}
|
||||||
|
@ -101,29 +97,31 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doTestPartition() {
|
private void doTestPartition() {
|
||||||
final int numDims = TestUtil.nextInt(random(), 1, 8);
|
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
|
||||||
|
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
|
||||||
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
|
||||||
|
int[] commonPrefixLengths = new int[numDataDims];
|
||||||
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
|
||||||
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc);
|
Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
|
||||||
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim);
|
final int splitDim = random().nextInt(numIndexDims);
|
||||||
final int splitDim = random().nextInt(numDims);
|
|
||||||
BytesRef firstValue = points[0].packedValue;
|
|
||||||
for (int i = 1; i < points.length; ++i) {
|
|
||||||
BytesRef packedValue = points[i].packedValue;
|
|
||||||
int offset = splitDim * bytesPerDim;
|
|
||||||
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
|
|
||||||
}
|
|
||||||
DummyPointsReader reader = new DummyPointsReader(points);
|
DummyPointsReader reader = new DummyPointsReader(points);
|
||||||
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
|
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
|
||||||
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot,
|
MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLengths[splitDim], reader, 0, points.length, pivot,
|
||||||
new BytesRef(), new BytesRef());
|
new BytesRef(), new BytesRef());
|
||||||
BytesRef pivotValue = reader.points[pivot].packedValue;
|
BytesRef pivotValue = reader.points[pivot].packedValue;
|
||||||
int offset = splitDim * bytesPerDim;
|
int offset = splitDim * bytesPerDim;
|
||||||
for (int i = 0; i < points.length; ++i) {
|
for (int i = 0; i < points.length; ++i) {
|
||||||
BytesRef value = reader.points[i].packedValue;
|
BytesRef value = reader.points[i].packedValue;
|
||||||
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim, pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
|
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim,
|
||||||
|
pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
cmp = reader.points[i].doc - reader.points[pivot].doc;
|
int dataDimOffset = numIndexDims * bytesPerDim;
|
||||||
|
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
|
||||||
|
cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataDimOffset, value.offset + dataDimOffset + dataDimsLength,
|
||||||
|
pivotValue.bytes, pivotValue.offset + dataDimOffset, pivotValue.offset + dataDimOffset + dataDimsLength);
|
||||||
|
if (cmp == 0) {
|
||||||
|
cmp = reader.points[i].doc - reader.points[pivot].doc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (i < pivot) {
|
if (i < pivot) {
|
||||||
assertTrue(cmp <= 0);
|
assertTrue(cmp <= 0);
|
||||||
|
@ -135,14 +133,54 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) {
|
private static Point[] createRandomPoints(int numDataDims, int numIndexdims, int bytesPerDim, int maxDoc, int[] commonPrefixLengths) {
|
||||||
final int packedBytesLength = numDims * bytesPerDim;
|
assertTrue(commonPrefixLengths.length == numDataDims);
|
||||||
|
final int packedBytesLength = numDataDims * bytesPerDim;
|
||||||
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
|
final int numPoints = TestUtil.nextInt(random(), 1, 100000);
|
||||||
Point[] points = new Point[numPoints];
|
Point[] points = new Point[numPoints];
|
||||||
for (int i = 0; i < numPoints; ++i) {
|
if (random().nextInt(5) != 0) {
|
||||||
byte[] value = new byte[packedBytesLength];
|
for (int i = 0; i < numPoints; ++i) {
|
||||||
random().nextBytes(value);
|
byte[] value = new byte[packedBytesLength];
|
||||||
points[i] = new Point(value, random().nextInt(maxDoc));
|
random().nextBytes(value);
|
||||||
|
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numDataDims; ++i) {
|
||||||
|
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||||
|
}
|
||||||
|
BytesRef firstValue = points[0].packedValue;
|
||||||
|
for (int i = 1; i < points.length; ++i) {
|
||||||
|
for (int dim = 0; dim < numDataDims; ++dim) {
|
||||||
|
int offset = dim * bytesPerDim;
|
||||||
|
BytesRef packedValue = points[i].packedValue;
|
||||||
|
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
//index dim are equal, data dims different
|
||||||
|
byte[] indexDims = new byte[numIndexdims * bytesPerDim];
|
||||||
|
random().nextBytes(indexDims);
|
||||||
|
byte[] dataDims = new byte[(numDataDims - numIndexdims) * bytesPerDim];
|
||||||
|
for (int i = 0; i < numPoints; ++i) {
|
||||||
|
byte[] value = new byte[packedBytesLength];
|
||||||
|
System.arraycopy(indexDims, 0, value, 0, numIndexdims * bytesPerDim);
|
||||||
|
random().nextBytes(dataDims);
|
||||||
|
System.arraycopy(dataDims, 0, value, numIndexdims * bytesPerDim, (numDataDims - numIndexdims) * bytesPerDim);
|
||||||
|
points[i] = new Point(value, random().nextInt(maxDoc));
|
||||||
|
}
|
||||||
|
for (int i = 0; i < numIndexdims; ++i) {
|
||||||
|
commonPrefixLengths[i] = bytesPerDim;
|
||||||
|
}
|
||||||
|
for (int i = numDataDims; i < numDataDims; ++i) {
|
||||||
|
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
|
||||||
|
}
|
||||||
|
BytesRef firstValue = points[0].packedValue;
|
||||||
|
for (int i = 1; i < points.length; ++i) {
|
||||||
|
for (int dim = numIndexdims; dim < numDataDims; ++dim) {
|
||||||
|
int offset = dim * bytesPerDim;
|
||||||
|
BytesRef packedValue = points[i].packedValue;
|
||||||
|
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return points;
|
return points;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue