LUCENE-8573: Use FutureArrays#mismatch in BKDWriter

Closes #510

Signed-off-by: Adrien Grand <jpountz@gmail.com>
This commit is contained in:
Christoph Büscher 2018-11-27 17:41:58 +01:00 committed by Adrien Grand
parent 2715beb6df
commit ecce083c50
2 changed files with 32 additions and 33 deletions

View File

@ -233,6 +233,11 @@ Optimizations
* LUCENE-8552: FieldInfos.getMergedFieldInfos no longer does any merging if there is <= 1 segment. * LUCENE-8552: FieldInfos.getMergedFieldInfos no longer does any merging if there is <= 1 segment.
(Christophe Bismuth via David Smiley) (Christophe Bismuth via David Smiley)
Other
* LUCENE-8573: BKDWriter now uses FutureArrays#mismatch to compute shared prefixes.
(Christoph Büscher via Adrien Grand)
======================= Lucene 7.6.0 ======================= ======================= Lucene 7.6.0 =======================
Build Build

View File

@ -678,13 +678,10 @@ public class BKDWriter implements Closeable {
checkMaxLeafNodeCount(leafBlockFPs.size()); checkMaxLeafNodeCount(leafBlockFPs.size());
// Find per-dim common prefix: // Find per-dim common prefix:
int prefix = bytesPerDim;
int offset = (leafCount - 1) * packedBytesLength; int offset = (leafCount - 1) * packedBytesLength;
for(int j=0;j<bytesPerDim;j++) { int prefix = FutureArrays.mismatch(leafValues, 0, bytesPerDim, leafValues, offset, offset + bytesPerDim);
if (leafValues[j] != leafValues[offset+j]) { if (prefix == -1) {
prefix = j; prefix = bytesPerDim;
break;
}
} }
commonPrefixLengths[0] = prefix; commonPrefixLengths[0] = prefix;
@ -1156,11 +1153,10 @@ public class BKDWriter implements Closeable {
//System.out.println("recursePack inner nodeID=" + nodeID + " splitDim=" + splitDim + " splitValue=" + new BytesRef(splitPackedValues, address, bytesPerDim)); //System.out.println("recursePack inner nodeID=" + nodeID + " splitDim=" + splitDim + " splitValue=" + new BytesRef(splitPackedValues, address, bytesPerDim));
// find common prefix with last split value in this dim: // find common prefix with last split value in this dim:
int prefix = 0; int prefix = FutureArrays.mismatch(splitPackedValues, address, address + bytesPerDim, lastSplitValues,
for(;prefix<bytesPerDim;prefix++) { splitDim * bytesPerDim, splitDim * bytesPerDim + bytesPerDim);
if (splitPackedValues[address+prefix] != lastSplitValues[splitDim * bytesPerDim + prefix]) { if (prefix == -1) {
break; prefix = bytesPerDim;
}
} }
//System.out.println("writeNodeData nodeID=" + nodeID + " splitDim=" + splitDim + " numDims=" + numDims + " bytesPerDim=" + bytesPerDim + " prefix=" + prefix); //System.out.println("writeNodeData nodeID=" + nodeID + " splitDim=" + splitDim + " numDims=" + numDims + " bytesPerDim=" + bytesPerDim + " prefix=" + prefix);
@ -1552,11 +1548,13 @@ public class BKDWriter implements Closeable {
reader.getValue(i, scratchBytesRef2); reader.getValue(i, scratchBytesRef2);
for (int dim=0;dim<numDataDims;dim++) { for (int dim=0;dim<numDataDims;dim++) {
final int offset = dim * bytesPerDim; final int offset = dim * bytesPerDim;
for(int j=0;j<commonPrefixLengths[dim];j++) { int dimensionPrefixLength = commonPrefixLengths[dim];
if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) { commonPrefixLengths[dim] = FutureArrays.mismatch(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset,
commonPrefixLengths[dim] = j; scratchBytesRef1.offset + offset + dimensionPrefixLength,
break; scratchBytesRef2.bytes, scratchBytesRef2.offset + offset,
} scratchBytesRef2.offset + offset + dimensionPrefixLength);
if (commonPrefixLengths[dim] == -1) {
commonPrefixLengths[dim] = dimensionPrefixLength;
} }
} }
} }
@ -1632,12 +1630,11 @@ public class BKDWriter implements Closeable {
final int splitDim = split(minPackedValue, maxPackedValue, parentSplits); final int splitDim = split(minPackedValue, maxPackedValue, parentSplits);
final int mid = (from + to + 1) >>> 1; final int mid = (from + to + 1) >>> 1;
int commonPrefixLen = bytesPerDim; int commonPrefixLen = FutureArrays.mismatch(minPackedValue, splitDim * bytesPerDim,
for (int i = 0; i < bytesPerDim; ++i) { splitDim * bytesPerDim + bytesPerDim, maxPackedValue, splitDim * bytesPerDim,
if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) { splitDim * bytesPerDim + bytesPerDim);
commonPrefixLen = i; if (commonPrefixLen == -1) {
break; commonPrefixLen = bytesPerDim;
}
} }
MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen, MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
@ -1713,12 +1710,9 @@ public class BKDWriter implements Closeable {
heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2); heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2);
int offset = dim * bytesPerDim; int offset = dim * bytesPerDim;
commonPrefixLengths[dim] = FutureArrays.mismatch(scratch1, offset, offset + bytesPerDim, scratch2, offset, offset + bytesPerDim);
if (commonPrefixLengths[dim] == -1) {
commonPrefixLengths[dim] = bytesPerDim; commonPrefixLengths[dim] = bytesPerDim;
for(int j=0;j<bytesPerDim;j++) {
if (scratch1[offset+j] != scratch2[offset+j]) {
commonPrefixLengths[dim] = j;
break;
}
} }
int prefix = commonPrefixLengths[dim]; int prefix = commonPrefixLengths[dim];
@ -1754,11 +1748,11 @@ public class BKDWriter implements Closeable {
heapSource.readPackedValue(i, scratch2); heapSource.readPackedValue(i, scratch2);
for (int dim = numIndexDims; dim < numDataDims; dim++) { for (int dim = numIndexDims; dim < numDataDims; dim++) {
final int offset = dim * bytesPerDim; final int offset = dim * bytesPerDim;
for (int j = 0; j < commonPrefixLengths[dim]; j++) { int dimensionPrefixLength = commonPrefixLengths[dim];
if (scratch1[offset + j] != scratch2[offset + j]) { commonPrefixLengths[dim] = FutureArrays.mismatch(scratch1, offset, offset + dimensionPrefixLength,
commonPrefixLengths[dim] = j; scratch2, offset, offset + dimensionPrefixLength);
break; if (commonPrefixLengths[dim] == -1) {
} commonPrefixLengths[dim] = dimensionPrefixLength;
} }
} }
} }