From 5511bcea05d402f301e8964299f1bdb5ce05b7e4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 6 Oct 2021 19:16:19 +0200 Subject: [PATCH] LUCENE-10153: Speed up BKDWriter using VarHandles. (#357) --- lucene/CHANGES.txt | 3 + .../lucene/util/bkd/BKDRadixSelector.java | 48 ++----- .../org/apache/lucene/util/bkd/BKDUtil.java | 104 ++++++++++++++ .../org/apache/lucene/util/bkd/BKDWriter.java | 115 +++++---------- .../util/bkd/MutablePointsReaderUtils.java | 29 ++-- .../org/apache/lucene/util/TestArrayUtil.java | 48 ++++--- .../apache/lucene/util/bkd/TestBKDUtil.java | 136 ++++++++++++++++++ 7 files changed, 335 insertions(+), 148 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java create mode 100644 lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDUtil.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index c26b4975a53..046f2ad07f3 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -297,6 +297,9 @@ Improvements * LUCENE-10143: Delegate primitive writes in RateLimitedIndexOutput. (Uwe Schindler, Robert Muir, Adrien Grand) +* LUCENE-10145, LUCENE-10153: Faster flushes and merges of points by leveraging + VarHandles. (Adrien Grand) + Bug fixes * LUCENE-9686: Fix read past EOF handling in DirectIODirectory. (Zach Chen, diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java index 92cfec3bcac..662f21b6c86 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDRadixSelector.java @@ -19,6 +19,8 @@ package org.apache.lucene.util.bkd; import java.io.IOException; import java.util.Arrays; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntroSelector; import org.apache.lucene.util.IntroSorter; @@ -437,11 +439,12 @@ public final class BKDRadixSelector { @Override protected Selector getFallbackSelector(int d) { final int skypedBytes = d + commonPrefixLength; - final int dimStart = dim * config.bytesPerDim + skypedBytes; - final int dimEnd = dim * config.bytesPerDim + config.bytesPerDim; + final int dimStart = dim * config.bytesPerDim; // data length is composed by the data dimensions plus the docID final int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim + Integer.BYTES; + final ByteArrayComparator dimComparator = + ArrayUtil.getUnsignedComparator(config.bytesPerDim); return new IntroSelector() { @Override @@ -473,13 +476,8 @@ public final class BKDRadixSelector { int iOffset = i * config.bytesPerDoc; int jOffset = j * config.bytesPerDoc; int cmp = - Arrays.compareUnsigned( - points.block, - iOffset + dimStart, - iOffset + dimEnd, - points.block, - jOffset + dimStart, - jOffset + dimEnd); + dimComparator.compare( + points.block, iOffset + dimStart, points.block, jOffset + dimStart); if (cmp != 0) { return cmp; } @@ -499,14 +497,7 @@ public final class BKDRadixSelector { protected int comparePivot(int j) { if (skypedBytes < config.bytesPerDim) { int jOffset = j * config.bytesPerDoc; - int cmp = - Arrays.compareUnsigned( - scratch, - skypedBytes, - config.bytesPerDim, - points.block, - jOffset + dimStart, - jOffset + dimEnd); + int cmp = dimComparator.compare(scratch, 0, points.block, jOffset + dimStart); if (cmp != 0) { return cmp; } @@ -564,11 +555,12 @@ public final class BKDRadixSelector { @Override protected Sorter getFallbackSorter(int k) { final int skypedBytes = k + commonPrefixLength; - final int dimStart = dim * config.bytesPerDim + skypedBytes; - final int dimEnd = dim * config.bytesPerDim + config.bytesPerDim; + final int dimStart = dim * config.bytesPerDim; // data length is composed by the data dimensions plus the docID final int dataLength = (config.numDims - config.numIndexDims) * config.bytesPerDim + Integer.BYTES; + final ByteArrayComparator dimComparator = + ArrayUtil.getUnsignedComparator(config.bytesPerDim); return new IntroSorter() { @Override @@ -600,13 +592,8 @@ public final class BKDRadixSelector { int iOffset = i * config.bytesPerDoc; int jOffset = j * config.bytesPerDoc; int cmp = - Arrays.compareUnsigned( - points.block, - iOffset + dimStart, - iOffset + dimEnd, - points.block, - jOffset + dimStart, - jOffset + dimEnd); + dimComparator.compare( + points.block, iOffset + dimStart, points.block, jOffset + dimStart); if (cmp != 0) { return cmp; } @@ -626,14 +613,7 @@ public final class BKDRadixSelector { protected int comparePivot(int j) { if (skypedBytes < config.bytesPerDim) { int jOffset = j * config.bytesPerDoc; - int cmp = - Arrays.compareUnsigned( - scratch, - skypedBytes, - config.bytesPerDim, - points.block, - jOffset + dimStart, - jOffset + dimEnd); + int cmp = dimComparator.compare(scratch, 0, points.block, jOffset + dimStart); if (cmp != 0) { return cmp; } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java new file mode 100644 index 00000000000..692cd91283a --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDUtil.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.bkd; + +import java.util.Arrays; +import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; +import org.apache.lucene.util.BitUtil; + +/** Utility functions to build BKD trees. */ +final class BKDUtil { + + private BKDUtil() {} + + /** + * Return a comparator that computes the common prefix length across the next {@code numBytes} of + * the provided arrays. + */ + public static ByteArrayComparator getPrefixLengthComparator(int numBytes) { + if (numBytes == Long.BYTES) { + // Used by LongPoint, DoublePoint + return BKDUtil::commonPrefixLength8; + } else if (numBytes == Integer.BYTES) { + // Used by IntPoint, FloatPoint, LatLonPoint, LatLonShape + return BKDUtil::commonPrefixLength4; + } else { + return (a, aOffset, b, bOffset) -> commonPrefixLengthN(a, aOffset, b, bOffset, numBytes); + } + } + + /** Return the length of the common prefix across the next 8 bytes of both provided arrays. */ + public static int commonPrefixLength8(byte[] a, int aOffset, byte[] b, int bOffset) { + long aLong = (long) BitUtil.VH_LE_LONG.get(a, aOffset); + long bLong = (long) BitUtil.VH_LE_LONG.get(b, bOffset); + final int commonPrefixInBits = Long.numberOfLeadingZeros(Long.reverseBytes(aLong ^ bLong)); + return commonPrefixInBits >>> 3; + } + + /** Return the length of the common prefix across the next 4 bytes of both provided arrays. */ + public static int commonPrefixLength4(byte[] a, int aOffset, byte[] b, int bOffset) { + int aInt = (int) BitUtil.VH_LE_INT.get(a, aOffset); + int bInt = (int) BitUtil.VH_LE_INT.get(b, bOffset); + final int commonPrefixInBits = Integer.numberOfLeadingZeros(Integer.reverseBytes(aInt ^ bInt)); + return commonPrefixInBits >>> 3; + } + + static int commonPrefixLengthN(byte[] a, int aOffset, byte[] b, int bOffset, int numBytes) { + int cmp = Arrays.mismatch(a, aOffset, aOffset + numBytes, b, bOffset, bOffset + numBytes); + if (cmp == -1) { + return numBytes; + } else { + return cmp; + } + } + + /** Predicate for a fixed number of bytes. */ + @FunctionalInterface + public static interface ByteArrayPredicate { + + /** Test bytes starting from the given offsets. */ + boolean test(byte[] a, int aOffset, byte[] b, int bOffset); + } + + /** Return a predicate that tells whether the next {@code numBytes} bytes are equal. */ + public static ByteArrayPredicate getEqualsPredicate(int numBytes) { + if (numBytes == Long.BYTES) { + // Used by LongPoint, DoublePoint + return BKDUtil::equals8; + } else if (numBytes == Integer.BYTES) { + // Used by IntPoint, FloatPoint, LatLonPoint, LatLonShape + return BKDUtil::equals4; + } else { + return (a, aOffset, b, bOffset) -> + Arrays.equals(a, aOffset, aOffset + numBytes, b, bOffset, bOffset + numBytes); + } + } + + /** Check whether the next 8 bytes are exactly the same in the provided arrays. */ + public static boolean equals8(byte[] a, int aOffset, byte[] b, int bOffset) { + long aLong = (long) BitUtil.VH_LE_LONG.get(a, aOffset); + long bLong = (long) BitUtil.VH_LE_LONG.get(b, bOffset); + return aLong == bLong; + } + + /** Check whether the next 4 bytes are exactly the same in the provided arrays. */ + public static boolean equals4(byte[] a, int aOffset, byte[] b, int bOffset) { + int aInt = (int) BitUtil.VH_LE_INT.get(a, aOffset); + int bInt = (int) BitUtil.VH_LE_INT.get(b, bOffset); + return aInt == bInt; + } +} diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java index 1b474c538b3..2ca964b44b2 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java @@ -43,6 +43,7 @@ import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.bkd.BKDUtil.ByteArrayPredicate; // TODO // - allow variable length byte[] (across docs and dims), but this is quite a bit more hairy @@ -94,6 +95,8 @@ public class BKDWriter implements Closeable { protected final BKDConfig config; private final ByteArrayComparator comparator; + private final ByteArrayPredicate equalsPredicate; + private final ByteArrayComparator commonPrefixComparator; final TrackingDirectoryWrapper tempDir; final String tempFileNamePrefix; @@ -146,6 +149,8 @@ public class BKDWriter implements Closeable { this.config = config; this.comparator = ArrayUtil.getUnsignedComparator(config.bytesPerDim); + this.equalsPredicate = BKDUtil.getEqualsPredicate(config.bytesPerDim); + this.commonPrefixComparator = BKDUtil.getPrefixLengthComparator(config.bytesPerDim); docsSeen = new FixedBitSet(maxDoc); @@ -668,14 +673,8 @@ public class BKDWriter implements Closeable { config, valueCount + leafCount, 0, lastPackedValue, packedValue, 0, docID, lastDocID); if (leafCount == 0 - || Arrays.mismatch( - leafValues, - (leafCount - 1) * config.bytesPerDim, - leafCount * config.bytesPerDim, - packedValue, - 0, - config.bytesPerDim) - != -1) { + || equalsPredicate.test(leafValues, (leafCount - 1) * config.bytesPerDim, packedValue, 0) + == false) { leafCardinality++; } System.arraycopy( @@ -780,15 +779,9 @@ public class BKDWriter implements Closeable { checkMaxLeafNodeCount(leafBlockFPs.size()); // Find per-dim common prefix: - int offset = (leafCount - 1) * config.packedBytesLength; - int prefix = - Arrays.mismatch( - leafValues, 0, config.bytesPerDim, leafValues, offset, offset + config.bytesPerDim); - if (prefix == -1) { - prefix = config.bytesPerDim; - } - - commonPrefixLengths[0] = prefix; + commonPrefixLengths[0] = + commonPrefixComparator.compare( + leafValues, 0, leafValues, (leafCount - 1) * config.packedBytesLength); writeLeafBlockDocs(dataOut, leafDocs, 0, leafCount); writeCommonPrefixes(dataOut, commonPrefixLengths, leafValues); @@ -1097,16 +1090,8 @@ public class BKDWriter implements Closeable { // find common prefix with last split value in this dim: int prefix = - Arrays.mismatch( - splitValue.bytes, - address, - address + config.bytesPerDim, - lastSplitValues, - splitDim * config.bytesPerDim, - splitDim * config.bytesPerDim + config.bytesPerDim); - if (prefix == -1) { - prefix = config.bytesPerDim; - } + commonPrefixComparator.compare( + splitValue.bytes, address, lastSplitValues, splitDim * config.bytesPerDim); // System.out.println("writeNodeData nodeID=" + nodeID + " splitDim=" + splitDim + " numDims=" // + numDims + " config.bytesPerDim=" + config.bytesPerDim + " prefix=" + prefix); @@ -1327,11 +1312,8 @@ public class BKDWriter implements Closeable { for (int i = 1; i < count; i++) { value = packedValues.apply(i); for (int dim = 0; dim < config.numDims; dim++) { - final int start = dim * config.bytesPerDim + commonPrefixLengths[dim]; - final int end = dim * config.bytesPerDim + config.bytesPerDim; - if (Arrays.mismatch( - value.bytes, value.offset + start, value.offset + end, scratch1, start, end) - != -1) { + final int start = dim * config.bytesPerDim; + if (equalsPredicate.test(value.bytes, value.offset + start, scratch1, start) == false) { out.writeVInt(cardinality); for (int j = 0; j < config.numDims; j++) { out.writeBytes( @@ -1594,16 +1576,13 @@ public class BKDWriter implements Closeable { final int offset = dim * config.bytesPerDim; int dimensionPrefixLength = commonPrefixLengths[dim]; commonPrefixLengths[dim] = - Arrays.mismatch( - scratchBytesRef1.bytes, - scratchBytesRef1.offset + offset, - scratchBytesRef1.offset + offset + dimensionPrefixLength, - scratchBytesRef2.bytes, - scratchBytesRef2.offset + offset, - scratchBytesRef2.offset + offset + dimensionPrefixLength); - if (commonPrefixLengths[dim] == -1) { - commonPrefixLengths[dim] = dimensionPrefixLength; - } + Math.min( + dimensionPrefixLength, + commonPrefixComparator.compare( + scratchBytesRef1.bytes, + scratchBytesRef1.offset + offset, + scratchBytesRef2.bytes, + scratchBytesRef2.offset + offset)); } } @@ -1652,16 +1631,13 @@ public class BKDWriter implements Closeable { for (int i = from + 1; i < to; ++i) { reader.getValue(i, collector); for (int dim = 0; dim < config.numDims; dim++) { - final int start = dim * config.bytesPerDim + commonPrefixLengths[dim]; - final int end = dim * config.bytesPerDim + config.bytesPerDim; - if (Arrays.mismatch( + final int start = dim * config.bytesPerDim; + if (equalsPredicate.test( collector.bytes, collector.offset + start, - collector.offset + end, comparator.bytes, - comparator.offset + start, - comparator.offset + end) - != -1) { + comparator.offset + start) + == false) { leafCardinality++; BytesRef scratch = collector; collector = comparator; @@ -1727,17 +1703,12 @@ public class BKDWriter implements Closeable { // How many points will be in the left tree: final int mid = from + numLeftLeafNodes * config.maxPointsInLeafNode; - int commonPrefixLen = - Arrays.mismatch( + final int commonPrefixLen = + commonPrefixComparator.compare( minPackedValue, splitDim * config.bytesPerDim, - splitDim * config.bytesPerDim + config.bytesPerDim, maxPackedValue, - splitDim * config.bytesPerDim, - splitDim * config.bytesPerDim + config.bytesPerDim); - if (commonPrefixLen == -1) { - commonPrefixLen = config.bytesPerDim; - } + splitDim * config.bytesPerDim); MutablePointsReaderUtils.partition( config, @@ -1998,17 +1969,12 @@ public class BKDWriter implements Closeable { BKDRadixSelector.PathSlice[] slices = new BKDRadixSelector.PathSlice[2]; - int commonPrefixLen = - Arrays.mismatch( + final int commonPrefixLen = + commonPrefixComparator.compare( minPackedValue, splitDim * config.bytesPerDim, - splitDim * config.bytesPerDim + config.bytesPerDim, maxPackedValue, - splitDim * config.bytesPerDim, - splitDim * config.bytesPerDim + config.bytesPerDim); - if (commonPrefixLen == -1) { - commonPrefixLen = config.bytesPerDim; - } + splitDim * config.bytesPerDim); byte[] splitValue = radixSelector.select( @@ -2091,17 +2057,14 @@ public class BKDWriter implements Closeable { packedValue = value.packedValue(); for (int dim = 0; dim < config.numDims; dim++) { if (commonPrefixLengths[dim] != 0) { - int j = - Arrays.mismatch( - commonPrefix, - dim * config.bytesPerDim, - dim * config.bytesPerDim + commonPrefixLengths[dim], - packedValue.bytes, - packedValue.offset + dim * config.bytesPerDim, - packedValue.offset + dim * config.bytesPerDim + commonPrefixLengths[dim]); - if (j != -1) { - commonPrefixLengths[dim] = j; - } + commonPrefixLengths[dim] = + Math.min( + commonPrefixLengths[dim], + commonPrefixComparator.compare( + commonPrefix, + dim * config.bytesPerDim, + packedValue.bytes, + packedValue.offset + dim * config.bytesPerDim)); } } } diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java index edfe3eac18b..24c8403e2c2 100644 --- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java +++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java @@ -18,6 +18,8 @@ package org.apache.lucene.util.bkd; import java.util.Arrays; import org.apache.lucene.codecs.MutablePointValues; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntroSelector; import org.apache.lucene.util.IntroSorter; @@ -94,8 +96,8 @@ public final class MutablePointsReaderUtils { BytesRef scratch1, BytesRef scratch2) { - final int start = sortedDim * config.bytesPerDim + commonPrefixLengths[sortedDim]; - final int dimEnd = sortedDim * config.bytesPerDim + config.bytesPerDim; + final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(config.bytesPerDim); + final int start = sortedDim * config.bytesPerDim; // No need for a fancy radix sort here, this is called on the leaves only so // there are not many values to sort new IntroSorter() { @@ -118,13 +120,8 @@ public final class MutablePointsReaderUtils { protected int comparePivot(int j) { reader.getValue(j, scratch2); int cmp = - Arrays.compareUnsigned( - pivot.bytes, - pivot.offset + start, - pivot.offset + dimEnd, - scratch2.bytes, - scratch2.offset + start, - scratch2.offset + dimEnd); + comparator.compare( + pivot.bytes, pivot.offset + start, scratch2.bytes, scratch2.offset + start); if (cmp == 0) { cmp = Arrays.compareUnsigned( @@ -167,11 +164,14 @@ public final class MutablePointsReaderUtils { @Override protected Selector getFallbackSelector(int k) { + final int dimStart = splitDim * config.bytesPerDim; final int dataStart = (k < dimCmpBytes) ? config.packedIndexBytesLength : config.packedIndexBytesLength + k - dimCmpBytes; final int dataEnd = config.numDims * config.bytesPerDim; + final ByteArrayComparator dimComparator = + ArrayUtil.getUnsignedComparator(config.bytesPerDim); return new IntroSelector() { final BytesRef pivot = scratch1; @@ -193,13 +193,10 @@ public final class MutablePointsReaderUtils { if (k < dimCmpBytes) { reader.getValue(j, scratch2); int cmp = - Arrays.compareUnsigned( - pivot.bytes, - pivot.offset + dimOffset + k, - pivot.offset + dimOffset + dimCmpBytes, - scratch2.bytes, - scratch2.offset + dimOffset + k, - scratch2.offset + dimOffset + dimCmpBytes); + dimComparator.compare( + pivot.bytes, pivot.offset + dimStart, + scratch2.bytes, scratch2.offset + dimStart); + if (cmp != 0) { return cmp; } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java index d70a6410ec0..bc2c3556b09 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java @@ -433,48 +433,52 @@ public class TestArrayUtil extends LuceneTestCase { } public void testCompareUnsigned4() { - int aI = TestUtil.nextInt(random(), 0, 3); - byte[] a = new byte[Integer.BYTES + aI]; - int bI = TestUtil.nextInt(random(), 0, 3); - byte[] b = new byte[Integer.BYTES + bI]; + int aOffset = TestUtil.nextInt(random(), 0, 3); + byte[] a = new byte[Integer.BYTES + aOffset]; + int bOffset = TestUtil.nextInt(random(), 0, 3); + byte[] b = new byte[Integer.BYTES + bOffset]; for (int i = 0; i < Integer.BYTES; ++i) { - a[aI + i] = (byte) random().nextInt(1 << 8); + a[aOffset + i] = (byte) random().nextInt(1 << 8); do { - b[bI + i] = (byte) random().nextInt(1 << 8); - } while (b[bI + i] == a[aI + i]); + b[bOffset + i] = (byte) random().nextInt(1 << 8); + } while (b[bOffset + i] == a[aOffset + i]); } for (int i = 0; i < Integer.BYTES; ++i) { - int expected = Arrays.compareUnsigned(a, aI, aI + Integer.BYTES, b, bI, bI + Integer.BYTES); - int actual = ArrayUtil.compareUnsigned4(a, aI, b, bI); + int expected = + Arrays.compareUnsigned( + a, aOffset, aOffset + Integer.BYTES, b, bOffset, bOffset + Integer.BYTES); + int actual = ArrayUtil.compareUnsigned4(a, aOffset, b, bOffset); assertEquals(Integer.signum(expected), Integer.signum(actual)); - b[bI + i] = a[aI + i]; + b[bOffset + i] = a[aOffset + i]; } - assertEquals(0, ArrayUtil.compareUnsigned4(a, aI, b, bI)); + assertEquals(0, ArrayUtil.compareUnsigned4(a, aOffset, b, bOffset)); } public void testCompareUnsigned8() { - int aI = TestUtil.nextInt(random(), 0, 7); - byte[] a = new byte[Long.BYTES + aI]; - int bI = TestUtil.nextInt(random(), 0, 3); - byte[] b = new byte[Long.BYTES + bI]; + int aOffset = TestUtil.nextInt(random(), 0, 7); + byte[] a = new byte[Long.BYTES + aOffset]; + int bOffset = TestUtil.nextInt(random(), 0, 7); + byte[] b = new byte[Long.BYTES + bOffset]; for (int i = 0; i < Long.BYTES; ++i) { - a[aI + i] = (byte) random().nextInt(1 << 8); + a[aOffset + i] = (byte) random().nextInt(1 << 8); do { - b[bI + i] = (byte) random().nextInt(1 << 8); - } while (b[bI + i] == a[aI + i]); + b[bOffset + i] = (byte) random().nextInt(1 << 8); + } while (b[bOffset + i] == a[aOffset + i]); } for (int i = 0; i < Long.BYTES; ++i) { - int expected = Arrays.compareUnsigned(a, aI, aI + Long.BYTES, b, bI, bI + Long.BYTES); - int actual = ArrayUtil.compareUnsigned8(a, aI, b, bI); + int expected = + Arrays.compareUnsigned( + a, aOffset, aOffset + Long.BYTES, b, bOffset, bOffset + Long.BYTES); + int actual = ArrayUtil.compareUnsigned8(a, aOffset, b, bOffset); assertEquals(Integer.signum(expected), Integer.signum(actual)); - b[bI + i] = a[aI + i]; + b[bOffset + i] = a[aOffset + i]; } - assertEquals(0, ArrayUtil.compareUnsigned8(a, aI, b, bI)); + assertEquals(0, ArrayUtil.compareUnsigned8(a, aOffset, b, bOffset)); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDUtil.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDUtil.java new file mode 100644 index 00000000000..e264fc691f7 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKDUtil.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util.bkd; + +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestBKDUtil extends LuceneTestCase { + + public void testEquals4() { + int aOffset = TestUtil.nextInt(random(), 0, 3); + byte[] a = new byte[Integer.BYTES + aOffset]; + int bOffset = TestUtil.nextInt(random(), 0, 3); + byte[] b = new byte[Integer.BYTES + bOffset]; + + for (int i = 0; i < Integer.BYTES; ++i) { + a[aOffset + i] = (byte) random().nextInt(1 << 8); + } + System.arraycopy(a, aOffset, b, bOffset, 4); + + assertTrue(BKDUtil.equals4(a, aOffset, b, bOffset)); + + for (int i = 0; i < Integer.BYTES; ++i) { + do { + b[bOffset + i] = (byte) random().nextInt(1 << 8); + } while (b[bOffset + i] == a[aOffset + i]); + + assertFalse(BKDUtil.equals4(a, aOffset, b, bOffset)); + + b[bOffset + i] = a[aOffset + i]; + } + } + + public void testEquals8() { + int aOffset = TestUtil.nextInt(random(), 0, 7); + byte[] a = new byte[Long.BYTES + aOffset]; + int bOffset = TestUtil.nextInt(random(), 0, 7); + byte[] b = new byte[Long.BYTES + bOffset]; + + for (int i = 0; i < Long.BYTES; ++i) { + a[aOffset + i] = (byte) random().nextInt(1 << 8); + } + System.arraycopy(a, aOffset, b, bOffset, 8); + + assertTrue(BKDUtil.equals8(a, aOffset, b, bOffset)); + + for (int i = 0; i < Long.BYTES; ++i) { + do { + b[bOffset + i] = (byte) random().nextInt(1 << 8); + } while (b[bOffset + i] == a[aOffset + i]); + + assertFalse(BKDUtil.equals8(a, aOffset, b, bOffset)); + + b[bOffset + i] = a[aOffset + i]; + } + } + + public void testCommonPrefixLength4() { + int aOffset = TestUtil.nextInt(random(), 0, 3); + byte[] a = new byte[Integer.BYTES + aOffset]; + int bOffset = TestUtil.nextInt(random(), 0, 3); + byte[] b = new byte[Integer.BYTES + bOffset]; + + for (int i = 0; i < Integer.BYTES; ++i) { + a[aOffset + i] = (byte) random().nextInt(1 << 8); + do { + b[bOffset + i] = (byte) random().nextInt(1 << 8); + } while (b[bOffset + i] == a[aOffset + i]); + } + + for (int i = 0; i < Integer.BYTES; ++i) { + assertEquals(i, BKDUtil.commonPrefixLength4(a, aOffset, b, bOffset)); + b[bOffset + i] = a[aOffset + i]; + } + + assertEquals(4, BKDUtil.commonPrefixLength4(a, aOffset, b, bOffset)); + } + + public void testCommonPrefixLength8() { + int aOffset = TestUtil.nextInt(random(), 0, 7); + byte[] a = new byte[Long.BYTES + aOffset]; + int bOffset = TestUtil.nextInt(random(), 0, 7); + byte[] b = new byte[Long.BYTES + bOffset]; + + for (int i = 0; i < Long.BYTES; ++i) { + a[aOffset + i] = (byte) random().nextInt(1 << 8); + do { + b[bOffset + i] = (byte) random().nextInt(1 << 8); + } while (b[bOffset + i] == a[aOffset + i]); + } + + for (int i = 0; i < Long.BYTES; ++i) { + assertEquals(i, BKDUtil.commonPrefixLength8(a, aOffset, b, bOffset)); + b[bOffset + i] = a[aOffset + i]; + } + + assertEquals(8, BKDUtil.commonPrefixLength8(a, aOffset, b, bOffset)); + } + + public void testCommonPrefixLengthN() { + final int numBytes = TestUtil.nextInt(random(), 2, 16); + + int aOffset = TestUtil.nextInt(random(), 0, numBytes - 1); + byte[] a = new byte[numBytes + aOffset]; + int bOffset = TestUtil.nextInt(random(), 0, numBytes - 1); + byte[] b = new byte[numBytes + bOffset]; + + for (int i = 0; i < numBytes; ++i) { + a[aOffset + i] = (byte) random().nextInt(1 << 8); + do { + b[bOffset + i] = (byte) random().nextInt(1 << 8); + } while (b[bOffset + i] == a[aOffset + i]); + } + + for (int i = 0; i < numBytes; ++i) { + assertEquals(i, BKDUtil.commonPrefixLengthN(a, aOffset, b, bOffset, numBytes)); + b[bOffset + i] = a[aOffset + i]; + } + + assertEquals(numBytes, BKDUtil.commonPrefixLengthN(a, aOffset, b, bOffset, numBytes)); + } +}