HBASE-17877 Improve HBase's byte[] comparator.

Signed-off-by: Lars Hofhansl <larsh@apache.org>
This commit is contained in:
Vikas Vishwakarma 2017-04-27 13:21:07 -07:00 committed by Lars Hofhansl
parent 880db3eee4
commit b81e00f5ea
3 changed files with 56 additions and 62 deletions

View File

@ -38,8 +38,10 @@ Copyright Jan Kovařík
Licensed under the Apache License v2.0 as a part of the Bootstrap project. Licensed under the Apache License v2.0 as a part of the Bootstrap project.
-- --
This product includes portions of the Guava project v14, specifically This product includes portions of the Guava project v14 and v21, specifically
'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java' 'hbase-common/src/main/java/org/apache/hadoop/hbase/io/LimitInputStream.java'
'hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java'
'hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java'
Copyright (C) 2007 The Guava Authors Copyright (C) 2007 The Guava Authors

View File

@ -701,46 +701,43 @@ public final class ByteBufferUtils {
} }
static int compareToUnsafe(Object obj1, long o1, int l1, Object obj2, long o2, int l2) { static int compareToUnsafe(Object obj1, long o1, int l1, Object obj2, long o2, int l2) {
final int stride = 8;
final int minLength = Math.min(l1, l2); final int minLength = Math.min(l1, l2);
final int minWords = minLength / Bytes.SIZEOF_LONG; int strideLimit = minLength & ~(stride - 1);
int i;
/* /*
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower than * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower than
* comparing 4 bytes at a time even on 32-bit. On the other hand, it is substantially faster on * comparing 4 bytes at a time even on 32-bit. On the other hand, it is substantially faster on
* 64-bit. * 64-bit.
*/ */
int j = minWords << 3; // Same as minWords * SIZEOF_LONG for (i = 0; i < strideLimit; i += stride) {
for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) { long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + (long) i);
long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + i); long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + (long) i);
long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + i); if (lw != rw) {
long diff = lw ^ rw; if (!UnsafeAccess.littleEndian) {
if (diff != 0) { return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1;
return lessThanUnsignedLong(lw, rw) ? -1 : 1; }
}
}
int offset = j;
if (minLength - offset >= Bytes.SIZEOF_INT) { /*
int il = UnsafeAccess.theUnsafe.getInt(obj1, o1 + offset); * We want to compare only the first index where left[index] != right[index]. This
int ir = UnsafeAccess.theUnsafe.getInt(obj2, o2 + offset); * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are
* little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant
* nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get
* that least significant nonzero byte. This comparison logic is based on UnsignedBytes
* from guava v21
*/
int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
}
}
// The epilogue to cover the last (minLength % stride) elements.
for (; i < minLength; i++) {
int il = (UnsafeAccess.theUnsafe.getByte(obj1, o1 + i) & 0xFF);
int ir = (UnsafeAccess.theUnsafe.getByte(obj2, o2 + i) & 0xFF);
if (il != ir) { if (il != ir) {
return lessThanUnsignedInt(il, ir) ? -1 : 1; return il - ir;
}
offset += Bytes.SIZEOF_INT;
}
if (minLength - offset >= Bytes.SIZEOF_SHORT) {
short sl = UnsafeAccess.theUnsafe.getShort(obj1, o1 + offset);
short sr = UnsafeAccess.theUnsafe.getShort(obj2, o2 + offset);
if (sl != sr) {
return lessThanUnsignedShort(sl, sr) ? -1 : 1;
}
offset += Bytes.SIZEOF_SHORT;
}
if (minLength - offset == 1) {
int a = (UnsafeAccess.theUnsafe.getByte(obj1, o1 + offset) & 0xff);
int b = (UnsafeAccess.theUnsafe.getByte(obj2, o2 + offset) & 0xff);
if (a != b) {
return a - b;
} }
} }
return l1 - l2; return l1 - l2;

View File

@ -1575,47 +1575,42 @@ public class Bytes implements Comparable<Bytes> {
length1 == length2) { length1 == length2) {
return 0; return 0;
} }
final int stride = 8;
final int minLength = Math.min(length1, length2); final int minLength = Math.min(length1, length2);
final int minWords = minLength / SIZEOF_LONG; int strideLimit = minLength & ~(stride - 1);
final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET; final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
int i;
/* /*
* Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower
* time is no slower than comparing 4 bytes at a time even on 32-bit. * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit.
* On the other hand, it is substantially faster on 64-bit.
*/ */
// This is the end offset of long parts. for (i = 0; i < strideLimit; i += stride) {
int j = minWords << 3; // Same as minWords * SIZEOF_LONG
for (int i = 0; i < j; i += SIZEOF_LONG) {
long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i); long lw = theUnsafe.getLong(buffer1, offset1Adj + (long) i);
long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i); long rw = theUnsafe.getLong(buffer2, offset2Adj + (long) i);
long diff = lw ^ rw; if (lw != rw) {
if (diff != 0) { if(!UnsafeAccess.littleEndian) {
return lessThanUnsignedLong(lw, rw) ? -1 : 1; return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1;
} }
}
int offset = j;
if (minLength - offset >= SIZEOF_INT) { /*
int il = theUnsafe.getInt(buffer1, offset1Adj + offset); * We want to compare only the first index where left[index] != right[index]. This
int ir = theUnsafe.getInt(buffer2, offset2Adj + offset); * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are
if (il != ir) { * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant
return lessThanUnsignedInt(il, ir) ? -1: 1; * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get
* that least significant nonzero byte. This comparison logic is based on UnsignedBytes
* comparator from guava v21
*/
int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
} }
offset += SIZEOF_INT;
} }
if (minLength - offset >= SIZEOF_SHORT) {
short sl = theUnsafe.getShort(buffer1, offset1Adj + offset); // The epilogue to cover the last (minLength % stride) elements.
short sr = theUnsafe.getShort(buffer2, offset2Adj + offset); for (; i < minLength; i++) {
if (sl != sr) { int a = (buffer1[offset1 + i] & 0xFF);
return lessThanUnsignedShort(sl, sr) ? -1: 1; int b = (buffer2[offset2 + i] & 0xFF);
}
offset += SIZEOF_SHORT;
}
if (minLength - offset == 1) {
int a = (buffer1[(int)(offset1 + offset)] & 0xff);
int b = (buffer2[(int)(offset2 + offset)] & 0xff);
if (a != b) { if (a != b) {
return a - b; return a - b;
} }