LUCENE-8888: Improve distribution of points with data dimensions in BKD tree leaves (#747)

This commit is contained in:
Ignacio Vera 2019-07-04 10:50:23 +02:00 committed by iverase
parent 792871c480
commit ef64f7af3e
14 changed files with 443 additions and 205 deletions

View File

@ -138,6 +138,9 @@ Optimizations
* LUCENE-8901: Load frequencies lazily only when needed in BlockDocsEnum and * LUCENE-8901: Load frequencies lazily only when needed in BlockDocsEnum and
BlockImpactsEverythingEnum (Mayya Sharipova). BlockImpactsEverythingEnum (Mayya Sharipova).
* LUCENE-8888: Optimize distribution of points with data dimensions in
BKD tree leaves. (Ignacio Vera)
Test Framework Test Framework
* LUCENE-8825: CheckHits now display the shard index in case of mismatch * LUCENE-8825: CheckHits now display the shard index in case of mismatch

View File

@ -597,7 +597,7 @@ final class SimpleTextBKDWriter implements Closeable {
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode; assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
//We re-use the selector so we do not need to create an object every time. //We re-use the selector so we do not need to create an object every time.
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix); BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
boolean success = false; boolean success = false;
try { try {
@ -605,7 +605,7 @@ final class SimpleTextBKDWriter implements Closeable {
build(1, numLeaves, points, out, build(1, numLeaves, points, out,
radixSelector, minPackedValue, maxPackedValue, radixSelector, minPackedValue, maxPackedValue,
splitPackedValues, leafBlockFPs); splitPackedValues, leafBlockFPs, new int[maxPointsInLeafNode]);
// If no exception, we should have cleaned everything up: // If no exception, we should have cleaned everything up:
@ -877,7 +877,7 @@ final class SimpleTextBKDWriter implements Closeable {
} }
// sort by sortedDim // sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2); reader, from, to, scratchBytesRef1, scratchBytesRef2);
// Save the block file pointer: // Save the block file pointer:
@ -920,7 +920,7 @@ final class SimpleTextBKDWriter implements Closeable {
break; break;
} }
} }
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2); reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value // set the split value
@ -951,7 +951,8 @@ final class SimpleTextBKDWriter implements Closeable {
BKDRadixSelector radixSelector, BKDRadixSelector radixSelector,
byte[] minPackedValue, byte[] maxPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
byte[] splitPackedValues, byte[] splitPackedValues,
long[] leafBlockFPs) throws IOException { long[] leafBlockFPs,
int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) { if (nodeID >= leafNodeOffset) {
@ -1010,7 +1011,12 @@ final class SimpleTextBKDWriter implements Closeable {
// loading the values: // loading the values:
int count = to - from; int count = to - from;
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset; assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
writeLeafBlockDocs(out, heapSource.docIDs, from, count); // Write doc IDs
int[] docIDs = spareDocIds;
for (int i = 0; i < count; i++) {
docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
}
writeLeafBlockDocs(out, spareDocIds, 0, count);
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
// from the index, much like how terms dict does so from the FST: // from the index, much like how terms dict does so from the FST:
@ -1030,7 +1036,7 @@ final class SimpleTextBKDWriter implements Closeable {
} }
}; };
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
heapSource.docIDs, from); docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues); writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
} else { } else {
@ -1075,12 +1081,12 @@ final class SimpleTextBKDWriter implements Closeable {
// Recurse on left tree: // Recurse on left tree:
build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector, build(2*nodeID, leafNodeOffset, pathSlices[0], out, radixSelector,
minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs); minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
// TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right? // TODO: we could "tail recurse" here? have our parent discard its refs as we recurse right?
// Recurse on right tree: // Recurse on right tree:
build(2*nodeID+1, leafNodeOffset, pathSlices[1], out, radixSelector, build(2*nodeID+1, leafNodeOffset, pathSlices[1], out, radixSelector,
minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs); minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
} }
} }
@ -1132,6 +1138,13 @@ final class SimpleTextBKDWriter implements Closeable {
if (cmp > 0) { if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim); throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord + " sortedDim=" + sortedDim);
} }
if (cmp == 0 && numDataDims > numIndexDims) {
int dataOffset = numIndexDims * bytesPerDim;
cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
if (cmp > 0) {
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
}
}
if (cmp == 0 && doc < lastDoc) { if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim); throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord + " sortedDim=" + sortedDim);
} }

View File

@ -48,6 +48,8 @@ public final class BKDRadixSelector {
private final int bytesSorted; private final int bytesSorted;
//data dimensions size //data dimensions size
private final int packedBytesLength; private final int packedBytesLength;
// data dimensions plus docID size
private final int packedBytesDocIDLength;
//flag to when we are moving to sort on heap //flag to when we are moving to sort on heap
private final int maxPointsSortInHeap; private final int maxPointsSortInHeap;
//reusable buffer //reusable buffer
@ -60,18 +62,26 @@ public final class BKDRadixSelector {
private final Directory tempDir; private final Directory tempDir;
// prefix for temp files // prefix for temp files
private final String tempFileNamePrefix; private final String tempFileNamePrefix;
// data and index dimensions
private final int numDataDims, numIndexDims;
/** /**
* Sole constructor. * Sole constructor.
*/ */
public BKDRadixSelector(int numDim, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) { public BKDRadixSelector(int numDataDims, int numIndexDims, int bytesPerDim, int maxPointsSortInHeap, Directory tempDir, String tempFileNamePrefix) {
this.bytesPerDim = bytesPerDim; this.bytesPerDim = bytesPerDim;
this.packedBytesLength = numDim * bytesPerDim; this.numDataDims = numDataDims;
this.bytesSorted = bytesPerDim + Integer.BYTES; this.numIndexDims = numIndexDims;
this.packedBytesLength = numDataDims * bytesPerDim;
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
// Selection and sorting is done in a given dimension. In case the value of the dimension are equal
// between two points we tie break first using the data-only dimensions and if those are still equal
// we tie-break on the docID. Here we account for all bytes used in the process.
this.bytesSorted = bytesPerDim + (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
this.maxPointsSortInHeap = maxPointsSortInHeap; this.maxPointsSortInHeap = maxPointsSortInHeap;
int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / (packedBytesLength + Integer.BYTES); int numberOfPointsOffline = MAX_SIZE_OFFLINE_BUFFER / packedBytesDocIDLength;
this.offlineBuffer = new byte[numberOfPointsOffline * (packedBytesLength + Integer.BYTES)]; this.offlineBuffer = new byte[numberOfPointsOffline * packedBytesDocIDLength];
this.partitionBucket = new int[bytesSorted]; this.partitionBucket = new int[bytesSorted];
this.histogram = new long[HISTOGRAM_SIZE]; this.histogram = new long[HISTOGRAM_SIZE];
this.scratch = new byte[bytesSorted]; this.scratch = new byte[bytesSorted];
@ -134,12 +144,12 @@ public final class BKDRadixSelector {
assert commonPrefixPosition > dimCommonPrefix; assert commonPrefixPosition > dimCommonPrefix;
reader.next(); reader.next();
PointValue pointValue = reader.pointValue(); PointValue pointValue = reader.pointValue();
BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
// copy dimension // copy dimension
BytesRef packedValue = pointValue.packedValue(); System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + offset, scratch, 0, bytesPerDim);
System.arraycopy(packedValue.bytes, packedValue.offset + offset, scratch, 0, bytesPerDim); // copy data dimensions and docID
// copy docID System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset + numIndexDims * bytesPerDim, scratch, bytesPerDim, (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES);
BytesRef docIDBytes = pointValue.docIDBytes();
System.arraycopy(docIDBytes.bytes, docIDBytes.offset, scratch, bytesPerDim, Integer.BYTES);
for (long i = from + 1; i < to; i++) { for (long i = from + 1; i < to; i++) {
reader.next(); reader.next();
pointValue = reader.pointValue(); pointValue = reader.pointValue();
@ -157,13 +167,15 @@ public final class BKDRadixSelector {
//check common prefix and adjust histogram //check common prefix and adjust histogram
final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix; final int startIndex = (dimCommonPrefix > bytesPerDim) ? bytesPerDim : dimCommonPrefix;
final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition; final int endIndex = (commonPrefixPosition > bytesPerDim) ? bytesPerDim : commonPrefixPosition;
packedValue = pointValue.packedValue(); packedValueDocID = pointValue.packedValueDocIDBytes();
int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValue.bytes, packedValue.offset + offset + startIndex, packedValue.offset + offset + endIndex); int j = FutureArrays.mismatch(scratch, startIndex, endIndex, packedValueDocID.bytes, packedValueDocID.offset + offset + startIndex, packedValueDocID.offset + offset + endIndex);
if (j == -1) { if (j == -1) {
if (commonPrefixPosition > bytesPerDim) { if (commonPrefixPosition > bytesPerDim) {
//tie-break on docID //tie-break on data dimensions + docID
docIDBytes = pointValue.docIDBytes(); final int startTieBreak = numIndexDims * bytesPerDim;
int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition, docIDBytes.bytes, docIDBytes.offset, docIDBytes.offset + commonPrefixPosition - bytesPerDim); final int endTieBreak = startTieBreak + commonPrefixPosition - bytesPerDim;
int k = FutureArrays.mismatch(scratch, bytesPerDim, commonPrefixPosition,
packedValueDocID.bytes, packedValueDocID.offset + startTieBreak, packedValueDocID.offset + endTieBreak);
if (k != -1) { if (k != -1) {
commonPrefixPosition = bytesPerDim + k; commonPrefixPosition = bytesPerDim + k;
Arrays.fill(histogram, 0); Arrays.fill(histogram, 0);
@ -195,8 +207,8 @@ public final class BKDRadixSelector {
BytesRef packedValue = pointValue.packedValue(); BytesRef packedValue = pointValue.packedValue();
bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff; bucket = packedValue.bytes[packedValue.offset + offset + commonPrefixPosition] & 0xff;
} else { } else {
BytesRef docIDValue = pointValue.docIDBytes(); BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
bucket = docIDValue.bytes[docIDValue.offset + commonPrefixPosition - bytesPerDim] & 0xff; bucket = packedValueDocID.bytes[packedValueDocID.offset + numIndexDims * bytesPerDim + commonPrefixPosition - bytesPerDim] & 0xff;
} }
return bucket; return bucket;
} }
@ -310,10 +322,11 @@ public final class BKDRadixSelector {
return partition; return partition;
} }
private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefix) { private byte[] heapRadixSelect(HeapPointWriter points, int dim, int from, int to, int partitionPoint, int commonPrefixLength) {
final int offset = dim * bytesPerDim + commonPrefix; final int dimOffset = dim * bytesPerDim + commonPrefixLength;
final int dimCmpBytes = bytesPerDim - commonPrefix; final int dimCmpBytes = bytesPerDim - commonPrefixLength;
new RadixSelector(bytesSorted - commonPrefix) { final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
new RadixSelector(bytesSorted - commonPrefixLength) {
@Override @Override
protected void swap(int i, int j) { protected void swap(int i, int j) {
@ -325,23 +338,23 @@ public final class BKDRadixSelector {
assert k >= 0 : "negative prefix " + k; assert k >= 0 : "negative prefix " + k;
if (k < dimCmpBytes) { if (k < dimCmpBytes) {
// dim bytes // dim bytes
return points.block[i * packedBytesLength + offset + k] & 0xff; return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
} else { } else {
// doc id // data bytes
int s = 3 - (k - dimCmpBytes); return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
return (points.docIDs[i] >>> (s * 8)) & 0xff;
} }
} }
@Override @Override
protected Selector getFallbackSelector(int d) { protected Selector getFallbackSelector(int d) {
int skypedBytes = d + commonPrefix; final int skypedBytes = d + commonPrefixLength;
final int start = dim * bytesPerDim + skypedBytes; final int dimStart = dim * bytesPerDim + skypedBytes;
final int end = dim * bytesPerDim + bytesPerDim; final int dimEnd = dim * bytesPerDim + bytesPerDim;
final int dataOffset = numIndexDims * bytesPerDim;
// data length is composed by the data dimensions plus the docID
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
return new IntroSelector() { return new IntroSelector() {
int pivotDoc = -1;
@Override @Override
protected void swap(int i, int j) { protected void swap(int i, int j) {
points.swap(i, j); points.swap(i, j);
@ -350,36 +363,37 @@ public final class BKDRadixSelector {
@Override @Override
protected void setPivot(int i) { protected void setPivot(int i) {
if (skypedBytes < bytesPerDim) { if (skypedBytes < bytesPerDim) {
System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim); System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
} }
pivotDoc = points.docIDs[i]; System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
} }
@Override @Override
protected int compare(int i, int j) { protected int compare(int i, int j) {
if (skypedBytes < bytesPerDim) { if (skypedBytes < bytesPerDim) {
int iOffset = i * packedBytesLength; int iOffset = i * packedBytesDocIDLength;
int jOffset = j * packedBytesLength; int jOffset = j * packedBytesDocIDLength;
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end, int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
points.block, jOffset + start, jOffset + end);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
} }
return points.docIDs[i] - points.docIDs[j]; int iOffset = i * packedBytesDocIDLength + dataOffset;
int jOffset = j * packedBytesDocIDLength + dataOffset;
return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
} }
@Override @Override
protected int comparePivot(int j) { protected int comparePivot(int j) {
if (skypedBytes < bytesPerDim) { if (skypedBytes < bytesPerDim) {
int jOffset = j * packedBytesLength; int jOffset = j * packedBytesDocIDLength;
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
points.block, jOffset + start, jOffset + end);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
} }
return pivotDoc - points.docIDs[j]; int jOffset = j * packedBytesDocIDLength + dataOffset;
return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
} }
}; };
} }
@ -394,8 +408,9 @@ public final class BKDRadixSelector {
/** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */ /** Sort the heap writer by the specified dim. It is used to sort the leaves of the tree */
public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) { public void heapRadixSort(final HeapPointWriter points, int from, int to, int dim, int commonPrefixLength) {
final int offset = dim * bytesPerDim + commonPrefixLength; final int dimOffset = dim * bytesPerDim + commonPrefixLength;
final int dimCmpBytes = bytesPerDim - commonPrefixLength; final int dimCmpBytes = bytesPerDim - commonPrefixLength;
final int dataOffset = numIndexDims * bytesPerDim - dimCmpBytes;
new MSBRadixSorter(bytesSorted - commonPrefixLength) { new MSBRadixSorter(bytesSorted - commonPrefixLength) {
@Override @Override
@ -403,11 +418,10 @@ public final class BKDRadixSelector {
assert k >= 0 : "negative prefix " + k; assert k >= 0 : "negative prefix " + k;
if (k < dimCmpBytes) { if (k < dimCmpBytes) {
// dim bytes // dim bytes
return points.block[i * packedBytesLength + offset + k] & 0xff; return points.block[i * packedBytesDocIDLength + dimOffset + k] & 0xff;
} else { } else {
// doc id // data bytes
int s = 3 - (k - dimCmpBytes); return points.block[i * packedBytesDocIDLength + dataOffset + k] & 0xff;
return (points.docIDs[i] >>> (s * 8)) & 0xff;
} }
} }
@ -418,13 +432,14 @@ public final class BKDRadixSelector {
@Override @Override
protected Sorter getFallbackSorter(int k) { protected Sorter getFallbackSorter(int k) {
int skypedBytes = k + commonPrefixLength; final int skypedBytes = k + commonPrefixLength;
final int start = dim * bytesPerDim + skypedBytes; final int dimStart = dim * bytesPerDim + skypedBytes;
final int end = dim * bytesPerDim + bytesPerDim; final int dimEnd = dim * bytesPerDim + bytesPerDim;
final int dataOffset = numIndexDims * bytesPerDim;
// data length is composed by the data dimensions plus the docID
final int dataLength = (numDataDims - numIndexDims) * bytesPerDim + Integer.BYTES;
return new IntroSorter() { return new IntroSorter() {
int pivotDoc = -1;
@Override @Override
protected void swap(int i, int j) { protected void swap(int i, int j) {
points.swap(i, j); points.swap(i, j);
@ -433,36 +448,37 @@ public final class BKDRadixSelector {
@Override @Override
protected void setPivot(int i) { protected void setPivot(int i) {
if (skypedBytes < bytesPerDim) { if (skypedBytes < bytesPerDim) {
System.arraycopy(points.block, i * packedBytesLength + dim * bytesPerDim, scratch, 0, bytesPerDim); System.arraycopy(points.block, i * packedBytesDocIDLength + dim * bytesPerDim, scratch, 0, bytesPerDim);
} }
pivotDoc = points.docIDs[i]; System.arraycopy(points.block, i * packedBytesDocIDLength + dataOffset, scratch, bytesPerDim, dataLength);
} }
@Override @Override
protected int compare(int i, int j) { protected int compare(int i, int j) {
if (skypedBytes < bytesPerDim) { if (skypedBytes < bytesPerDim) {
int iOffset = i * packedBytesLength; int iOffset = i * packedBytesDocIDLength;
int jOffset = j * packedBytesLength; int jOffset = j * packedBytesDocIDLength;
int cmp = FutureArrays.compareUnsigned(points.block, iOffset + start, iOffset + end, int cmp = FutureArrays.compareUnsigned(points.block, iOffset + dimStart, iOffset + dimEnd, points.block, jOffset + dimStart, jOffset + dimEnd);
points.block, jOffset + start, jOffset + end);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
} }
return points.docIDs[i] - points.docIDs[j]; int iOffset = i * packedBytesDocIDLength + dataOffset;
int jOffset = j * packedBytesDocIDLength + dataOffset;
return FutureArrays.compareUnsigned(points.block, iOffset, iOffset + dataLength, points.block, jOffset, jOffset + dataLength);
} }
@Override @Override
protected int comparePivot(int j) { protected int comparePivot(int j) {
if (skypedBytes < bytesPerDim) { if (skypedBytes < bytesPerDim) {
int jOffset = j * packedBytesLength; int jOffset = j * packedBytesDocIDLength;
int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, int cmp = FutureArrays.compareUnsigned(scratch, skypedBytes, bytesPerDim, points.block, jOffset + dimStart, jOffset + dimEnd);
points.block, jOffset + start, jOffset + end);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
} }
return pivotDoc - points.docIDs[j]; int jOffset = j * packedBytesDocIDLength + dataOffset;
return FutureArrays.compareUnsigned(scratch, bytesPerDim, bytesPerDim + dataLength, points.block, jOffset, jOffset + dataLength);
} }
}; };
} }

View File

@ -776,7 +776,7 @@ public class BKDWriter implements Closeable {
assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode; assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
//We re-use the selector so we do not need to create an object every time. //We re-use the selector so we do not need to create an object every time.
BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix); BKDRadixSelector radixSelector = new BKDRadixSelector(numDataDims, numIndexDims, bytesPerDim, maxPointsSortInHeap, tempDir, tempFileNamePrefix);
boolean success = false; boolean success = false;
try { try {
@ -787,7 +787,8 @@ public class BKDWriter implements Closeable {
minPackedValue, maxPackedValue, minPackedValue, maxPackedValue,
parentSplits, parentSplits,
splitPackedValues, splitPackedValues,
leafBlockFPs); leafBlockFPs,
new int[maxPointsInLeafNode]);
assert Arrays.equals(parentSplits, new int[numIndexDims]); assert Arrays.equals(parentSplits, new int[numIndexDims]);
// If no exception, we should have cleaned everything up: // If no exception, we should have cleaned everything up:
@ -1366,7 +1367,7 @@ public class BKDWriter implements Closeable {
} }
// sort by sortedDim // sort by sortedDim
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths,
reader, from, to, scratchBytesRef1, scratchBytesRef2); reader, from, to, scratchBytesRef1, scratchBytesRef2);
BytesRef comparator = scratchBytesRef1; BytesRef comparator = scratchBytesRef1;
@ -1435,7 +1436,7 @@ public class BKDWriter implements Closeable {
commonPrefixLen = bytesPerDim; commonPrefixLen = bytesPerDim;
} }
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLen,
reader, from, to, mid, scratchBytesRef1, scratchBytesRef2); reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
// set the split value // set the split value
@ -1472,7 +1473,8 @@ public class BKDWriter implements Closeable {
byte[] minPackedValue, byte[] maxPackedValue, byte[] minPackedValue, byte[] maxPackedValue,
int[] parentSplits, int[] parentSplits,
byte[] splitPackedValues, byte[] splitPackedValues,
long[] leafBlockFPs) throws IOException { long[] leafBlockFPs,
int[] spareDocIds) throws IOException {
if (nodeID >= leafNodeOffset) { if (nodeID >= leafNodeOffset) {
@ -1532,7 +1534,13 @@ public class BKDWriter implements Closeable {
// loading the values: // loading the values:
int count = to - from; int count = to - from;
assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset; assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
writeLeafBlockDocs(out, heapSource.docIDs, from, count); assert count <= spareDocIds.length : "count=" + count + " > length=" + spareDocIds.length;
// Write doc IDs
int[] docIDs = spareDocIds;
for (int i = 0; i < count; i++) {
docIDs[i] = heapSource.getPackedValueSlice(from + i).docID();
}
writeLeafBlockDocs(out, docIDs, 0, count);
// TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
// from the index, much like how terms dict does so from the FST: // from the index, much like how terms dict does so from the FST:
@ -1555,7 +1563,7 @@ public class BKDWriter implements Closeable {
} }
}; };
assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues, assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
heapSource.docIDs, from); docIDs, 0);
writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality); writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues, leafCardinality);
} else { } else {
@ -1602,12 +1610,12 @@ public class BKDWriter implements Closeable {
// Recurse on left tree: // Recurse on left tree:
build(2 * nodeID, leafNodeOffset, slices[0], build(2 * nodeID, leafNodeOffset, slices[0],
out, radixSelector, minPackedValue, maxSplitPackedValue, out, radixSelector, minPackedValue, maxSplitPackedValue,
parentSplits, splitPackedValues, leafBlockFPs); parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
// Recurse on right tree: // Recurse on right tree:
build(2 * nodeID + 1, leafNodeOffset, slices[1], build(2 * nodeID + 1, leafNodeOffset, slices[1],
out, radixSelector, minSplitPackedValue, maxPackedValue out, radixSelector, minSplitPackedValue, maxPackedValue
, parentSplits, splitPackedValues, leafBlockFPs); , parentSplits, splitPackedValues, leafBlockFPs, spareDocIds);
parentSplits[splitDim]--; parentSplits[splitDim]--;
} }
@ -1661,6 +1669,13 @@ public class BKDWriter implements Closeable {
if (cmp > 0) { if (cmp > 0) {
throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord); throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
} }
if (cmp == 0 && numDataDims > numIndexDims) {
int dataOffset = numIndexDims * bytesPerDim;
cmp = FutureArrays.compareUnsigned(lastPackedValue, dataOffset, packedBytesLength, packedValue, packedValueOffset + dataOffset, packedValueOffset + packedBytesLength);
if (cmp > 0) {
throw new AssertionError("data values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
}
}
if (cmp == 0 && doc < lastDoc) { if (cmp == 0 && doc < lastDoc) {
throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord); throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
} }

View File

@ -27,16 +27,16 @@ public final class HeapPointReader implements PointReader {
private int curRead; private int curRead;
final byte[] block; final byte[] block;
final int packedBytesLength; final int packedBytesLength;
final int[] docIDs; final int packedBytesDocIDLength;
final int end; final int end;
private final HeapPointValue pointValue; private final HeapPointValue pointValue;
public HeapPointReader(byte[] block, int packedBytesLength, int[] docIDs, int start, int end) { public HeapPointReader(byte[] block, int packedBytesLength, int start, int end) {
this.block = block; this.block = block;
this.docIDs = docIDs;
curRead = start-1; curRead = start-1;
this.end = end; this.end = end;
this.packedBytesLength = packedBytesLength; this.packedBytesLength = packedBytesLength;
this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
if (start < end) { if (start < end) {
this.pointValue = new HeapPointValue(block, packedBytesLength); this.pointValue = new HeapPointValue(block, packedBytesLength);
} else { } else {
@ -53,7 +53,7 @@ public final class HeapPointReader implements PointReader {
@Override @Override
public PointValue pointValue() { public PointValue pointValue() {
pointValue.setValue(curRead * packedBytesLength, docIDs[curRead]); pointValue.setOffset(curRead * packedBytesDocIDLength);
return pointValue; return pointValue;
} }
@ -66,21 +66,22 @@ public final class HeapPointReader implements PointReader {
*/ */
static class HeapPointValue implements PointValue { static class HeapPointValue implements PointValue {
BytesRef packedValue; final BytesRef packedValue;
BytesRef docIDBytes; final BytesRef packedValueDocID;
int docID; final int packedValueLength;
public HeapPointValue(byte[] value, int packedLength) { HeapPointValue(byte[] value, int packedValueLength) {
packedValue = new BytesRef(value, 0, packedLength); this.packedValueLength = packedValueLength;
docIDBytes = new BytesRef(new byte[4]); this.packedValue = new BytesRef(value, 0, packedValueLength);
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
} }
/** /**
* Sets a new value by changing the offset and docID. * Sets a new value by changing the offset.
*/ */
public void setValue(int offset, int docID) { public void setOffset(int offset) {
this.docID = docID;
packedValue.offset = offset; packedValue.offset = offset;
packedValueDocID.offset = offset;
} }
@Override @Override
@ -90,16 +91,14 @@ public final class HeapPointReader implements PointReader {
@Override @Override
public int docID() { public int docID() {
return docID; int position = packedValueDocID.offset + packedValueLength;
return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
| ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
} }
@Override @Override
public BytesRef docIDBytes() { public BytesRef packedValueDocIDBytes() {
docIDBytes.bytes[0] = (byte) (docID >> 24); return packedValueDocID;
docIDBytes.bytes[1] = (byte) (docID >> 16);
docIDBytes.bytes[2] = (byte) (docID >> 8);
docIDBytes.bytes[3] = (byte) (docID >> 0);
return docIDBytes;
} }
} }
} }

View File

@ -25,10 +25,10 @@ import org.apache.lucene.util.FutureArrays;
* @lucene.internal * @lucene.internal
* */ * */
public final class HeapPointWriter implements PointWriter { public final class HeapPointWriter implements PointWriter {
public final int[] docIDs;
public final byte[] block; public final byte[] block;
final int size; final int size;
final int packedBytesLength; final int packedBytesLength;
final int packedBytesDocIDLength;
private final byte[] scratch; private final byte[] scratch;
private int nextWrite; private int nextWrite;
private boolean closed; private boolean closed;
@ -37,11 +37,11 @@ public final class HeapPointWriter implements PointWriter {
public HeapPointWriter(int size, int packedBytesLength) { public HeapPointWriter(int size, int packedBytesLength) {
this.docIDs = new int[size]; this.packedBytesDocIDLength = packedBytesLength + Integer.BYTES;
this.block = new byte[packedBytesLength * size];
this.size = size;
this.packedBytesLength = packedBytesLength; this.packedBytesLength = packedBytesLength;
this.scratch = new byte[packedBytesLength]; this.block = new byte[packedBytesDocIDLength * size];
this.size = size;
this.scratch = new byte[packedBytesDocIDLength];
if (size > 0) { if (size > 0) {
pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength); pointValue = new HeapPointReader.HeapPointValue(block, packedBytesLength);
} else { } else {
@ -53,7 +53,7 @@ public final class HeapPointWriter implements PointWriter {
/** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */ /** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
public PointValue getPackedValueSlice(int index) { public PointValue getPackedValueSlice(int index) {
assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index; assert index < nextWrite : "nextWrite=" + (nextWrite) + " vs index=" + index;
pointValue.setValue(index * packedBytesLength, docIDs[index]); pointValue.setOffset(index * packedBytesDocIDLength);
return pointValue; return pointValue;
} }
@ -62,8 +62,12 @@ public final class HeapPointWriter implements PointWriter {
assert closed == false : "point writer is already closed"; assert closed == false : "point writer is already closed";
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]"; assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]";
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size; assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
System.arraycopy(packedValue, 0, block, nextWrite * packedBytesLength, packedBytesLength); System.arraycopy(packedValue, 0, block, nextWrite * packedBytesDocIDLength, packedBytesLength);
docIDs[nextWrite] = docID; int position = nextWrite * packedBytesDocIDLength + packedBytesLength;
block[position] = (byte) (docID >> 24);
block[++position] = (byte) (docID >> 16);
block[++position] = (byte) (docID >> 8);
block[++position] = (byte) (docID >> 0);
nextWrite++; nextWrite++;
} }
@ -71,27 +75,23 @@ public final class HeapPointWriter implements PointWriter {
public void append(PointValue pointValue) { public void append(PointValue pointValue) {
assert closed == false : "point writer is already closed"; assert closed == false : "point writer is already closed";
assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size; assert nextWrite < size : "nextWrite=" + (nextWrite + 1) + " vs size=" + size;
BytesRef packedValue = pointValue.packedValue(); BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + (packedBytesLength) + "] but was [" + packedValue.length + "]"; assert packedValueDocID.length == packedBytesDocIDLength : "[packedValue] must have length [" + (packedBytesDocIDLength) + "] but was [" + packedValueDocID.length + "]";
System.arraycopy(packedValue.bytes, packedValue.offset, block, nextWrite * packedBytesLength, packedBytesLength); System.arraycopy(packedValueDocID.bytes, packedValueDocID.offset, block, nextWrite * packedBytesDocIDLength, packedBytesDocIDLength);
docIDs[nextWrite] = pointValue.docID();
nextWrite++; nextWrite++;
} }
public void swap(int i, int j) { public void swap(int i, int j) {
int docID = docIDs[i];
docIDs[i] = docIDs[j];
docIDs[j] = docID;
int indexI = i * packedBytesLength; int indexI = i * packedBytesDocIDLength;
int indexJ = j * packedBytesLength; int indexJ = j * packedBytesDocIDLength;
// scratch1 = values[i] // scratch1 = values[i]
System.arraycopy(block, indexI, scratch, 0, packedBytesLength); System.arraycopy(block, indexI, scratch, 0, packedBytesDocIDLength);
// values[i] = values[j] // values[i] = values[j]
System.arraycopy(block, indexJ, block, indexI, packedBytesLength); System.arraycopy(block, indexJ, block, indexI, packedBytesDocIDLength);
// values[j] = scratch1 // values[j] = scratch1
System.arraycopy(scratch, 0, block, indexJ, packedBytesLength); System.arraycopy(scratch, 0, block, indexJ, packedBytesDocIDLength);
} }
public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) { public int computeCardinality(int from, int to, int numDataDims, int bytesPerDim, int[] commonPrefixLengths) {
@ -101,8 +101,8 @@ public final class HeapPointWriter implements PointWriter {
for (int dim = 0; dim < numDataDims; dim++) { for (int dim = 0; dim < numDataDims; dim++) {
final int start = dim * bytesPerDim + commonPrefixLengths[dim]; final int start = dim * bytesPerDim + commonPrefixLengths[dim];
final int end = dim * bytesPerDim + bytesPerDim; final int end = dim * bytesPerDim + bytesPerDim;
if (FutureArrays.mismatch(block, i * packedBytesLength + start, i * packedBytesLength + end, if (FutureArrays.mismatch(block, i * packedBytesDocIDLength + start, i * packedBytesDocIDLength + end,
block, (i - 1) * packedBytesLength + start, (i - 1) * packedBytesLength + end) != -1) { block, (i - 1) * packedBytesDocIDLength + start, (i - 1) * packedBytesDocIDLength + end) != -1) {
leafCardinality++; leafCardinality++;
break; break;
} }
@ -119,9 +119,9 @@ public final class HeapPointWriter implements PointWriter {
@Override @Override
public PointReader getReader(long start, long length) { public PointReader getReader(long start, long length) {
assert closed : "point writer is still open and trying to get a reader"; assert closed : "point writer is still open and trying to get a reader";
assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length; assert start + length <= size: "start=" + start + " length=" + length + " docIDs.length=" + size;
assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite; assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
return new HeapPointReader(block, packedBytesLength, docIDs, (int) start, Math.toIntExact(start+length)); return new HeapPointReader(block, packedBytesLength, (int) start, Math.toIntExact(start+length));
} }
@Override @Override
@ -135,6 +135,6 @@ public final class HeapPointWriter implements PointWriter {
@Override @Override
public String toString() { public String toString() {
return "HeapPointWriter(count=" + nextWrite + " size=" + docIDs.length + ")"; return "HeapPointWriter(count=" + nextWrite + " size=" + size + ")";
} }
} }

View File

@ -77,7 +77,8 @@ public final class MutablePointsReaderUtils {
protected int comparePivot(int j) { protected int comparePivot(int j) {
if (k < packedBytesLength) { if (k < packedBytesLength) {
reader.getValue(j, scratch); reader.getValue(j, scratch);
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k, scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k); int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + packedBytesLength - k,
scratch.bytes, scratch.offset + k, scratch.offset + k + packedBytesLength - k);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
@ -91,14 +92,16 @@ public final class MutablePointsReaderUtils {
} }
/** Sort points on the given dimension. */ /** Sort points on the given dimension. */
public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths, public static void sortByDim(int numDataDim, int numIndexDim, int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
MutablePointValues reader, int from, int to, MutablePointValues reader, int from, int to,
BytesRef scratch1, BytesRef scratch2) { BytesRef scratch1, BytesRef scratch2) {
final int start = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
final int dimEnd = sortedDim * bytesPerDim + bytesPerDim;
final int dataStart = numIndexDim * bytesPerDim;
final int dataEnd = dataStart + (numDataDim - numIndexDim) * bytesPerDim;
// No need for a fancy radix sort here, this is called on the leaves only so // No need for a fancy radix sort here, this is called on the leaves only so
// there are not many values to sort // there are not many values to sort
final int offset = sortedDim * bytesPerDim + commonPrefixLengths[sortedDim];
final int numBytesToCompare = bytesPerDim - commonPrefixLengths[sortedDim];
new IntroSorter() { new IntroSorter() {
final BytesRef pivot = scratch1; final BytesRef pivot = scratch1;
@ -118,9 +121,14 @@ public final class MutablePointsReaderUtils {
@Override @Override
protected int comparePivot(int j) { protected int comparePivot(int j) {
reader.getValue(j, scratch2); reader.getValue(j, scratch2);
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset, pivot.offset + offset + numBytesToCompare, scratch2.bytes, scratch2.offset + offset, scratch2.offset + offset + numBytesToCompare); int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + start, pivot.offset + dimEnd, scratch2.bytes,
scratch2.offset + start, scratch2.offset + dimEnd);
if (cmp == 0) { if (cmp == 0) {
cmp = pivotDoc - reader.getDocID(j); cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
if (cmp == 0) {
cmp = pivotDoc - reader.getDocID(j);
}
} }
return cmp; return cmp;
} }
@ -130,16 +138,20 @@ public final class MutablePointsReaderUtils {
/** Partition points around {@code mid}. All values on the left must be less /** Partition points around {@code mid}. All values on the left must be less
* than or equal to it and all values on the right must be greater than or * than or equal to it and all values on the right must be greater than or
* equal to it. */ * equal to it. */
public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen, public static void partition(int numDataDim, int numIndexDim, int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
MutablePointValues reader, int from, int to, int mid, MutablePointValues reader, int from, int to, int mid,
BytesRef scratch1, BytesRef scratch2) { BytesRef scratch1, BytesRef scratch2) {
final int offset = splitDim * bytesPerDim + commonPrefixLen; final int dimOffset = splitDim * bytesPerDim + commonPrefixLen;
final int cmpBytes = bytesPerDim - commonPrefixLen; final int dimCmpBytes = bytesPerDim - commonPrefixLen;
final int dataOffset = numIndexDim * bytesPerDim;
final int dataCmpBytes = (numDataDim - numIndexDim) * bytesPerDim + dimCmpBytes;
final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1); final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
new RadixSelector(cmpBytes + (bitsPerDocId + 7) / 8) { new RadixSelector(dataCmpBytes + (bitsPerDocId + 7) / 8) {
@Override @Override
protected Selector getFallbackSelector(int k) { protected Selector getFallbackSelector(int k) {
final int dataStart = (k < dimCmpBytes) ? dataOffset : dataOffset + k - dimCmpBytes;
final int dataEnd = numDataDim * bytesPerDim;
return new IntroSelector() { return new IntroSelector() {
final BytesRef pivot = scratch1; final BytesRef pivot = scratch1;
@ -158,9 +170,18 @@ public final class MutablePointsReaderUtils {
@Override @Override
protected int comparePivot(int j) { protected int comparePivot(int j) {
if (k < cmpBytes) { if (k < dimCmpBytes) {
reader.getValue(j, scratch2); reader.getValue(j, scratch2);
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + offset + k, pivot.offset + offset + k + cmpBytes - k, scratch2.bytes, scratch2.offset + offset + k, scratch2.offset + offset + k + cmpBytes - k); int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dimOffset + k, pivot.offset + dimOffset + dimCmpBytes,
scratch2.bytes, scratch2.offset + dimOffset + k, scratch2.offset + dimOffset + dimCmpBytes);
if (cmp != 0) {
return cmp;
}
}
if (k < dataCmpBytes) {
reader.getValue(j, scratch2);
int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + dataStart, pivot.offset + dataEnd,
scratch2.bytes, scratch2.offset + dataStart, scratch2.offset + dataEnd);
if (cmp != 0) { if (cmp != 0) {
return cmp; return cmp;
} }
@ -177,10 +198,12 @@ public final class MutablePointsReaderUtils {
@Override @Override
protected int byteAt(int i, int k) { protected int byteAt(int i, int k) {
if (k < cmpBytes) { if (k < dimCmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, offset + k)); return Byte.toUnsignedInt(reader.getByteAt(i, dimOffset + k));
} else if (k < dataCmpBytes) {
return Byte.toUnsignedInt(reader.getByteAt(i, dataOffset + k - dimCmpBytes));
} else { } else {
final int shift = bitsPerDocId - ((k - cmpBytes + 1) << 3); final int shift = bitsPerDocId - ((k - dataCmpBytes + 1) << 3);
return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff; return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
} }
} }

View File

@ -137,12 +137,14 @@ public final class OfflinePointReader implements PointReader {
*/ */
static class OfflinePointValue implements PointValue { static class OfflinePointValue implements PointValue {
BytesRef packedValue; final BytesRef packedValue;
BytesRef docIDBytes; final BytesRef packedValueDocID;
final int packedValueLength;
OfflinePointValue(byte[] value, int packedValueLength) { OfflinePointValue(byte[] value, int packedValueLength) {
packedValue = new BytesRef(value, 0, packedValueLength); this.packedValueLength = packedValueLength;
docIDBytes = new BytesRef(value, packedValueLength, Integer.BYTES); this.packedValue = new BytesRef(value, 0, packedValueLength);
this.packedValueDocID = new BytesRef(value, 0, packedValueLength + Integer.BYTES);
} }
/** /**
@ -150,7 +152,7 @@ public final class OfflinePointReader implements PointReader {
*/ */
public void setOffset(int offset) { public void setOffset(int offset) {
packedValue.offset = offset; packedValue.offset = offset;
docIDBytes.offset = offset + packedValue.length; packedValueDocID.offset = offset;
} }
@Override @Override
@ -160,14 +162,14 @@ public final class OfflinePointReader implements PointReader {
@Override @Override
public int docID() { public int docID() {
int position =docIDBytes.offset; int position = packedValueDocID.offset + packedValueLength;
return ((docIDBytes.bytes[position] & 0xFF) << 24) | ((docIDBytes.bytes[++position] & 0xFF) << 16) return ((packedValueDocID.bytes[position] & 0xFF) << 24) | ((packedValueDocID.bytes[++position] & 0xFF) << 16)
| ((docIDBytes.bytes[++position] & 0xFF) << 8) | (docIDBytes.bytes[++position] & 0xFF); | ((packedValueDocID.bytes[++position] & 0xFF) << 8) | (packedValueDocID.bytes[++position] & 0xFF);
} }
@Override @Override
public BytesRef docIDBytes() { public BytesRef packedValueDocIDBytes() {
return docIDBytes; return packedValueDocID;
} }
} }

View File

@ -62,12 +62,9 @@ public final class OfflinePointWriter implements PointWriter {
@Override @Override
public void append(PointValue pointValue) throws IOException { public void append(PointValue pointValue) throws IOException {
assert closed == false : "Point writer is already closed"; assert closed == false : "Point writer is already closed";
BytesRef packedValue = pointValue.packedValue(); BytesRef packedValueDocID = pointValue.packedValueDocIDBytes();
assert packedValue.length == packedBytesLength : "[packedValue] must have length [" + packedBytesLength + "] but was [" + packedValue.length + "]"; assert packedValueDocID.length == packedBytesLength + Integer.BYTES : "[packedValue and docID] must have length [" + (packedBytesLength + Integer.BYTES) + "] but was [" + packedValueDocID.length + "]";
out.writeBytes(packedValue.bytes, packedValue.offset, packedValue.length); out.writeBytes(packedValueDocID.bytes, packedValueDocID.offset, packedValueDocID.length);
BytesRef docIDBytes = pointValue.docIDBytes();
assert docIDBytes.length == Integer.BYTES : "[docIDBytes] must have length [" + Integer.BYTES + "] but was [" + docIDBytes.length + "]";
out.writeBytes(docIDBytes.bytes, docIDBytes.offset, docIDBytes.length);
count++; count++;
assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count; assert expectedCount == 0 || count <= expectedCount : "expectedCount=" + expectedCount + " vs count=" + count;
} }

View File

@ -28,10 +28,11 @@ public interface PointValue {
/** Returns the packed values for the dimensions */ /** Returns the packed values for the dimensions */
BytesRef packedValue(); BytesRef packedValue();
/** Returns the document id */ /** Returns the docID */
int docID(); int docID();
/** Returns the byte representation of the document id */ /** Returns the byte representation of the packed value
BytesRef docIDBytes(); * together with the docID */
BytesRef packedValueDocIDBytes();
} }

View File

@ -1098,6 +1098,80 @@ public class TestBKD extends LuceneTestCase {
} }
} }
public void testCheckDataDimOptimalOrder() throws IOException {
Directory dir = newDirectory();
final int numValues = atLeast(5000);
final int maxPointsInLeafNode = TestUtil.nextInt(random(), 50, 500);
final int numBytesPerDim = TestUtil.nextInt(random(), 1, 4);
final double maxMB = (float) 3.0 + (3*random().nextDouble());
final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final byte[] pointValue1 = new byte[numDataDims * numBytesPerDim];
final byte[] pointValue2 = new byte[numDataDims * numBytesPerDim];
random().nextBytes(pointValue1);
random().nextBytes(pointValue2);
// equal index dimensions but different data dimensions
for (int i = 0; i < numIndexDims; i++) {
System.arraycopy(pointValue1, i * numBytesPerDim, pointValue2, i * numBytesPerDim, numBytesPerDim);
}
BKDWriter w = new BKDWriter(2 * numValues, dir, "_temp", numDataDims, numIndexDims, numBytesPerDim, maxPointsInLeafNode,
maxMB, 2 * numValues);
for (int i = 0; i < numValues; ++i) {
w.add(pointValue1, i);
w.add(pointValue2, i);
}
final long indexFP;
try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
indexFP = w.finish(out);
w.close();
}
IndexInput pointsIn = dir.openInput("bkd", IOContext.DEFAULT);
pointsIn.seek(indexFP);
BKDReader points = new BKDReader(pointsIn);
points.intersect(new IntersectVisitor() {
byte[] previous = null;
boolean hasChanged = false;
@Override
public void visit(int docID) {
throw new UnsupportedOperationException();
}
@Override
public void visit(int docID, byte[] packedValue) {
if (previous == null) {
previous = new byte[numDataDims * numBytesPerDim];
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
} else {
int mismatch = FutureArrays.mismatch(packedValue, 0, numDataDims * numBytesPerDim, previous, 0, numDataDims * numBytesPerDim);
if (mismatch != -1) {
if (hasChanged == false) {
hasChanged = true;
System.arraycopy(packedValue, 0, previous, 0, numDataDims * numBytesPerDim);
} else {
fail("Points are not in optimal order");
}
}
}
}
@Override
public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
return Relation.CELL_CROSSES_QUERY;
}
});
pointsIn.close();
dir.close();
}
public void test2DLongOrdsOffline() throws Exception { public void test2DLongOrdsOffline() throws Exception {
try (Directory dir = newDirectory()) { try (Directory dir = newDirectory()) {
int numDocs = 100000; int numDocs = 100000;

View File

@ -209,7 +209,7 @@ public class TestBKDRadixSelector extends LuceneTestCase {
} }
private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{ private void verify(Directory dir, PointWriter points, int dataDimensions, int indexDimensions, long start, long end, long middle, int packedLength, int bytesPerDimensions, int sortedOnHeap) throws IOException{
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDimensions, sortedOnHeap, dir, "test"); BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDimensions, sortedOnHeap, dir, "test");
//we only split by indexed dimension so we check for each only those dimension //we only split by indexed dimension so we check for each only those dimension
for (int splitDim = 0; splitDim < indexDimensions; splitDim++) { for (int splitDim = 0; splitDim < indexDimensions; splitDim++) {
//We need to make a copy of the data as it is deleted in the process //We need to make a copy of the data as it is deleted in the process
@ -225,9 +225,15 @@ public class TestBKDRadixSelector extends LuceneTestCase {
int cmp = FutureArrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions); int cmp = FutureArrays.compareUnsigned(max, 0, bytesPerDimensions, min, 0, bytesPerDimensions);
assertTrue(cmp <= 0); assertTrue(cmp <= 0);
if (cmp == 0) { if (cmp == 0) {
int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint); byte[] maxDataDim = getMaxDataDimension(slices[0], bytesPerDimensions, dataDimensions, indexDimensions, max, splitDim);
int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint); byte[] minDataDim = getMinDataDimension(slices[1], bytesPerDimensions, dataDimensions, indexDimensions, min, splitDim);
assertTrue(minDocId >= maxDocID); cmp = FutureArrays.compareUnsigned(maxDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions, minDataDim, 0, (dataDimensions - indexDimensions) * bytesPerDimensions);
assertTrue(cmp <= 0);
if (cmp == 0) {
int maxDocID = getMaxDocId(slices[0], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,maxDataDim);
int minDocId = getMinDocId(slices[1], bytesPerDimensions, splitDim, partitionPoint, dataDimensions, indexDimensions,minDataDim);
assertTrue(minDocId >= maxDocID);
}
} }
assertTrue(Arrays.equals(partitionPoint, min)); assertTrue(Arrays.equals(partitionPoint, min));
slices[0].writer.destroy(); slices[0].writer.destroy();
@ -293,14 +299,17 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return min; return min;
} }
private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException { private int getMinDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
int docID = Integer.MAX_VALUE; int docID = Integer.MAX_VALUE;
try (PointReader reader = p.writer.getReader(p.start, p.count)) { try (PointReader reader = p.writer.getReader(p.start, p.count)) {
while (reader.next()) { while (reader.next()) {
PointValue pointValue = reader.pointValue(); PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue(); BytesRef packedValue = pointValue.packedValue();
int offset = dimension * bytesPerDimension; int offset = dimension * bytesPerDimension;
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) { int dataOffset = indexDims * bytesPerDimension;
int dataLength = (dataDims - indexDims) * bytesPerDimension;
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
&& FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
int newDocID = pointValue.docID(); int newDocID = pointValue.docID();
if (newDocID < docID) { if (newDocID < docID) {
docID = newDocID; docID = newDocID;
@ -311,6 +320,26 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return docID; return docID;
} }
private byte[] getMinDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] minDim, int splitDim) throws IOException {
byte[] min = new byte[(dataDims - indexDims) * bytesPerDimension];
Arrays.fill(min, (byte) 0xff);
int offset = splitDim * bytesPerDimension;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
if (FutureArrays.mismatch(minDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
if (FutureArrays.compareUnsigned(min, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) > 0) {
System.arraycopy(value, 0, min, 0, (dataDims - indexDims) * bytesPerDimension);
}
}
}
}
return min;
}
private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException { private byte[] getMax(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension) throws IOException {
byte[] max = new byte[bytesPerDimension]; byte[] max = new byte[bytesPerDimension];
Arrays.fill(max, (byte) 0); Arrays.fill(max, (byte) 0);
@ -328,14 +357,37 @@ public class TestBKDRadixSelector extends LuceneTestCase {
return max; return max;
} }
private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint) throws IOException { private byte[] getMaxDataDimension(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dataDims, int indexDims, byte[] maxDim, int splitDim) throws IOException {
byte[] max = new byte[(dataDims - indexDims) * bytesPerDimension];
Arrays.fill(max, (byte) 0);
int offset = splitDim * bytesPerDimension;
try (PointReader reader = p.writer.getReader(p.start, p.count)) {
byte[] value = new byte[(dataDims - indexDims) * bytesPerDimension];
while (reader.next()) {
PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue();
if (FutureArrays.mismatch(maxDim, 0, bytesPerDimension, packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension) == -1) {
System.arraycopy(packedValue.bytes, packedValue.offset + indexDims * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension);
if (FutureArrays.compareUnsigned(max, 0, (dataDims - indexDims) * bytesPerDimension, value, 0, (dataDims - indexDims) * bytesPerDimension) < 0) {
System.arraycopy(value, 0, max, 0, (dataDims - indexDims) * bytesPerDimension);
}
}
}
}
return max;
}
private int getMaxDocId(BKDRadixSelector.PathSlice p, int bytesPerDimension, int dimension, byte[] partitionPoint, int dataDims, int indexDims, byte[] dataDim) throws IOException {
int docID = Integer.MIN_VALUE; int docID = Integer.MIN_VALUE;
try (PointReader reader = p.writer.getReader(p.start, p.count)) { try (PointReader reader = p.writer.getReader(p.start, p.count)) {
while (reader.next()) { while (reader.next()) {
PointValue pointValue = reader.pointValue(); PointValue pointValue = reader.pointValue();
BytesRef packedValue = pointValue.packedValue(); BytesRef packedValue = pointValue.packedValue();
int offset = dimension * bytesPerDimension; int offset = dimension * bytesPerDimension;
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0) { int dataOffset = indexDims * bytesPerDimension;
int dataLength = (dataDims - indexDims) * bytesPerDimension;
if (FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + offset, packedValue.offset + offset + bytesPerDimension, partitionPoint, 0, bytesPerDimension) == 0
&& FutureArrays.compareUnsigned(packedValue.bytes, packedValue.offset + dataOffset, packedValue.offset + dataOffset + dataLength, dataDim, 0, dataLength) == 0) {
int newDocID = pointValue.docID(); int newDocID = pointValue.docID();
if (newDocID > docID) { if (newDocID > docID) {
docID = newDocID; docID = newDocID;

View File

@ -117,7 +117,7 @@ public class TestBKDRadixSort extends LuceneTestCase {
private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{ private void verifySort(HeapPointWriter points, int dataDimensions, int indexDimensions, int start, int end, int bytesPerDim) throws IOException{
int packedBytesLength = dataDimensions * bytesPerDim; int packedBytesLength = dataDimensions * bytesPerDim;
Directory dir = newDirectory(); Directory dir = newDirectory();
BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, bytesPerDim, 1000, dir, "test"); BKDRadixSelector radixSelector = new BKDRadixSelector(dataDimensions, indexDimensions, bytesPerDim, 1000, dir, "test");
// we check for each dimension // we check for each dimension
for (int splitDim = 0; splitDim < dataDimensions; splitDim++) { for (int splitDim = 0; splitDim < dataDimensions; splitDim++) {
radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim)); radixSelector.heapRadixSort(points, start, end, splitDim, getRandomCommonPrefix(points, start, end, bytesPerDim, splitDim));
@ -130,6 +130,11 @@ public class TestBKDRadixSort extends LuceneTestCase {
BytesRef value = pointValue.packedValue(); BytesRef value = pointValue.packedValue();
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim); int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dimOffset, value.offset + dimOffset + bytesPerDim, previous, dimOffset, dimOffset + bytesPerDim);
assertTrue(cmp >= 0); assertTrue(cmp >= 0);
if (cmp == 0) {
int dataOffset = indexDimensions * bytesPerDim;
cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataOffset, value.offset + packedBytesLength, previous, dataOffset, packedBytesLength);
assertTrue(cmp >= 0);
}
if (cmp == 0) { if (cmp == 0) {
assertTrue(pointValue.docID() >= previousDocId); assertTrue(pointValue.docID() >= previousDocId);
} }

View File

@ -38,7 +38,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
private void doTestSort() { private void doTestSort() {
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16); final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30)); final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(1, bytesPerDim, maxDoc); Point[] points = createRandomPoints(1, 1, bytesPerDim, maxDoc, new int[1]);
DummyPointsReader reader = new DummyPointsReader(points); DummyPointsReader reader = new DummyPointsReader(points);
MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length); MutablePointsReaderUtils.sort(maxDoc, bytesPerDim, reader, 0, points.length);
Arrays.sort(points, new Comparator<Point>() { Arrays.sort(points, new Comparator<Point>() {
@ -62,25 +62,15 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
} }
private void doTestSortByDim() { private void doTestSortByDim() {
final int numDims = TestUtil.nextInt(random(), 1, 8); final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16); final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30)); final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc); int[] commonPrefixLengths = new int[numDataDims];
int[] commonPrefixLengths = new int[numDims]; Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
for (int i = 0; i < commonPrefixLengths.length; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = 0; dim < numDims; ++dim) {
int offset = dim * bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}
}
DummyPointsReader reader = new DummyPointsReader(points); DummyPointsReader reader = new DummyPointsReader(points);
final int sortedDim = random().nextInt(numDims); final int sortedDim = random().nextInt(numIndexDims);
MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length, MutablePointsReaderUtils.sortByDim(numDataDims, numIndexDims, sortedDim, bytesPerDim, commonPrefixLengths, reader, 0, points.length,
new BytesRef(), new BytesRef()); new BytesRef(), new BytesRef());
for (int i = 1; i < points.length; ++i) { for (int i = 1; i < points.length; ++i) {
final int offset = sortedDim * bytesPerDim; final int offset = sortedDim * bytesPerDim;
@ -88,7 +78,13 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
BytesRef currentValue = reader.points[i].packedValue; BytesRef currentValue = reader.points[i].packedValue;
int cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim); int cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + offset, previousValue.offset + offset + bytesPerDim, currentValue.bytes, currentValue.offset + offset, currentValue.offset + offset + bytesPerDim);
if (cmp == 0) { if (cmp == 0) {
cmp = reader.points[i - 1].doc - reader.points[i].doc; int dataDimOffset = numIndexDims * bytesPerDim;
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
cmp = FutureArrays.compareUnsigned(previousValue.bytes, previousValue.offset + dataDimOffset, previousValue.offset + dataDimOffset + dataDimsLength,
currentValue.bytes, currentValue.offset + dataDimOffset, currentValue.offset + dataDimOffset + dataDimsLength);
if (cmp == 0) {
cmp = reader.points[i - 1].doc - reader.points[i].doc;
}
} }
assertTrue(cmp <= 0); assertTrue(cmp <= 0);
} }
@ -101,29 +97,31 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
} }
private void doTestPartition() { private void doTestPartition() {
final int numDims = TestUtil.nextInt(random(), 1, 8); final int numIndexDims = TestUtil.nextInt(random(), 1, 8);
final int numDataDims = TestUtil.nextInt(random(), numIndexDims, 8);
final int bytesPerDim = TestUtil.nextInt(random(), 1, 16); final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
int[] commonPrefixLengths = new int[numDataDims];
final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30)); final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
Point[] points = createRandomPoints(numDims, bytesPerDim, maxDoc); Point[] points = createRandomPoints(numDataDims, numIndexDims, bytesPerDim, maxDoc, commonPrefixLengths);
int commonPrefixLength = TestUtil.nextInt(random(), 0, bytesPerDim); final int splitDim = random().nextInt(numIndexDims);
final int splitDim = random().nextInt(numDims);
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
BytesRef packedValue = points[i].packedValue;
int offset = splitDim * bytesPerDim;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLength);
}
DummyPointsReader reader = new DummyPointsReader(points); DummyPointsReader reader = new DummyPointsReader(points);
final int pivot = TestUtil.nextInt(random(), 0, points.length - 1); final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLength, reader, 0, points.length, pivot, MutablePointsReaderUtils.partition(numDataDims, numIndexDims, maxDoc, splitDim, bytesPerDim, commonPrefixLengths[splitDim], reader, 0, points.length, pivot,
new BytesRef(), new BytesRef()); new BytesRef(), new BytesRef());
BytesRef pivotValue = reader.points[pivot].packedValue; BytesRef pivotValue = reader.points[pivot].packedValue;
int offset = splitDim * bytesPerDim; int offset = splitDim * bytesPerDim;
for (int i = 0; i < points.length; ++i) { for (int i = 0; i < points.length; ++i) {
BytesRef value = reader.points[i].packedValue; BytesRef value = reader.points[i].packedValue;
int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim, pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim); int cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + offset, value.offset + offset + bytesPerDim,
pivotValue.bytes, pivotValue.offset + offset, pivotValue.offset + offset + bytesPerDim);
if (cmp == 0) { if (cmp == 0) {
cmp = reader.points[i].doc - reader.points[pivot].doc; int dataDimOffset = numIndexDims * bytesPerDim;
int dataDimsLength = (numDataDims - numIndexDims) * bytesPerDim;
cmp = FutureArrays.compareUnsigned(value.bytes, value.offset + dataDimOffset, value.offset + dataDimOffset + dataDimsLength,
pivotValue.bytes, pivotValue.offset + dataDimOffset, pivotValue.offset + dataDimOffset + dataDimsLength);
if (cmp == 0) {
cmp = reader.points[i].doc - reader.points[pivot].doc;
}
} }
if (i < pivot) { if (i < pivot) {
assertTrue(cmp <= 0); assertTrue(cmp <= 0);
@ -135,14 +133,54 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
} }
} }
private static Point[] createRandomPoints(int numDims, int bytesPerDim, int maxDoc) { private static Point[] createRandomPoints(int numDataDims, int numIndexdims, int bytesPerDim, int maxDoc, int[] commonPrefixLengths) {
final int packedBytesLength = numDims * bytesPerDim; assertTrue(commonPrefixLengths.length == numDataDims);
final int packedBytesLength = numDataDims * bytesPerDim;
final int numPoints = TestUtil.nextInt(random(), 1, 100000); final int numPoints = TestUtil.nextInt(random(), 1, 100000);
Point[] points = new Point[numPoints]; Point[] points = new Point[numPoints];
for (int i = 0; i < numPoints; ++i) { if (random().nextInt(5) != 0) {
byte[] value = new byte[packedBytesLength]; for (int i = 0; i < numPoints; ++i) {
random().nextBytes(value); byte[] value = new byte[packedBytesLength];
points[i] = new Point(value, random().nextInt(maxDoc)); random().nextBytes(value);
points[i] = new Point(value, random().nextInt(maxDoc));
}
for (int i = 0; i < numDataDims; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = 0; dim < numDataDims; ++dim) {
int offset = dim * bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}
}
} else {
//index dim are equal, data dims different
byte[] indexDims = new byte[numIndexdims * bytesPerDim];
random().nextBytes(indexDims);
byte[] dataDims = new byte[(numDataDims - numIndexdims) * bytesPerDim];
for (int i = 0; i < numPoints; ++i) {
byte[] value = new byte[packedBytesLength];
System.arraycopy(indexDims, 0, value, 0, numIndexdims * bytesPerDim);
random().nextBytes(dataDims);
System.arraycopy(dataDims, 0, value, numIndexdims * bytesPerDim, (numDataDims - numIndexdims) * bytesPerDim);
points[i] = new Point(value, random().nextInt(maxDoc));
}
for (int i = 0; i < numIndexdims; ++i) {
commonPrefixLengths[i] = bytesPerDim;
}
for (int i = numDataDims; i < numDataDims; ++i) {
commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, bytesPerDim);
}
BytesRef firstValue = points[0].packedValue;
for (int i = 1; i < points.length; ++i) {
for (int dim = numIndexdims; dim < numDataDims; ++dim) {
int offset = dim * bytesPerDim;
BytesRef packedValue = points[i].packedValue;
System.arraycopy(firstValue.bytes, firstValue.offset + offset, packedValue.bytes, packedValue.offset + offset, commonPrefixLengths[dim]);
}
}
} }
return points; return points;
} }