From 3eee7302847b55291cc87820d96e603f5e4d7612 Mon Sep 17 00:00:00 2001 From: anoopsjohn Date: Mon, 29 Jun 2015 22:50:33 +0530 Subject: [PATCH] HBASE-12345 Unsafe based ByteBuffer Comparator. --- .../hadoop/hbase/util/ByteBufferUtils.java | 95 +++++++++++++++++-- .../hadoop/hbase/util/UnsafeAccess.java | 93 +++++++++++------- 2 files changed, 148 insertions(+), 40 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java index 66366797e95..33e5cc6e265 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java @@ -492,13 +492,12 @@ public final class ByteBufferUtils { return output; } - public static int compareTo(ByteBuffer buf1, int o1, int len1, ByteBuffer buf2, int o2, int len2) { - if (buf1.hasArray() && buf2.hasArray()) { - return Bytes.compareTo(buf1.array(), buf1.arrayOffset() + o1, len1, buf2.array(), - buf2.arrayOffset() + o2, len2); + public static int compareTo(ByteBuffer buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) { + if (UnsafeAccess.isAvailable()) { + return compareToUnsafe(buf1, o1, l1, buf2, o2, l2); } - int end1 = o1 + len1; - int end2 = o2 + len2; + int end1 = o1 + l1; + int end2 = o2 + l2; for (int i = o1, j = o2; i < end1 && j < end2; i++, j++) { int a = buf1.get(i) & 0xFF; int b = buf2.get(j) & 0xFF; @@ -506,7 +505,89 @@ public final class ByteBufferUtils { return a - b; } } - return len1 - len2; + return l1 - l2; + } + + static int compareToUnsafe(ByteBuffer buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) { + final int minLength = Math.min(l1, l2); + final int minWords = minLength / Bytes.SIZEOF_LONG; + + /* + * Compare 8 bytes at a time. Benchmarking shows comparing 8 bytes at a time is no slower than + * comparing 4 bytes at a time even on 32-bit. On the other hand, it is substantially faster on + * 64-bit. + */ + int j = minWords << 3; // Same as minWords * SIZEOF_LONG + for (int i = 0; i < j; i += Bytes.SIZEOF_LONG) { + long lw = UnsafeAccess.getAsLong(buf1, o1 + i); + long rw = UnsafeAccess.getAsLong(buf2, o2 + i); + long diff = lw ^ rw; + if (diff != 0) { + return lessThanUnsignedLong(lw, rw) ? -1 : 1; + } + } + int offset = j; + + if (minLength - offset >= Bytes.SIZEOF_INT) { + int il = UnsafeAccess.getAsInt(buf1, o1 + offset); + int ir = UnsafeAccess.getAsInt(buf2, o2 + offset); + if (il != ir) { + return lessThanUnsignedInt(il, ir) ? -1 : 1; + } + offset += Bytes.SIZEOF_INT; + } + if (minLength - offset >= Bytes.SIZEOF_SHORT) { + short sl = UnsafeAccess.getAsShort(buf1, o1 + offset); + short sr = UnsafeAccess.getAsShort(buf2, o2 + offset); + if (sl != sr) { + return lessThanUnsignedShort(sl, sr) ? -1 : 1; + } + offset += Bytes.SIZEOF_SHORT; + } + if (minLength - offset == 1) { + int a = (buf1.get(o1 + offset) & 0xff); + int b = (buf2.get(o2 + offset) & 0xff); + if (a != b) { + return a - b; + } + } + return l1 - l2; + } + + /* + * Both values are passed as is read by Unsafe. When platform is Little Endian, have to convert + * to corresponding Big Endian value and then do compare. We do all writes in Big Endian format. + */ + private static boolean lessThanUnsignedLong(long x1, long x2) { + if (UnsafeAccess.littleEndian) { + x1 = Long.reverseBytes(x1); + x2 = Long.reverseBytes(x2); + } + return (x1 + Long.MIN_VALUE) < (x2 + Long.MIN_VALUE); + } + + /* + * Both values are passed as is read by Unsafe. When platform is Little Endian, have to convert + * to corresponding Big Endian value and then do compare. We do all writes in Big Endian format. + */ + private static boolean lessThanUnsignedInt(int x1, int x2) { + if (UnsafeAccess.littleEndian) { + x1 = Integer.reverseBytes(x1); + x2 = Integer.reverseBytes(x2); + } + return (x1 & 0xffffffffL) < (x2 & 0xffffffffL); + } + + /* + * Both values are passed as is read by Unsafe. When platform is Little Endian, have to convert + * to corresponding Big Endian value and then do compare. We do all writes in Big Endian format. + */ + private static boolean lessThanUnsignedShort(short x1, short x2) { + if (UnsafeAccess.littleEndian) { + x1 = Short.reverseBytes(x1); + x2 = Short.reverseBytes(x2); + } + return (x1 & 0xffff) < (x2 & 0xffff); } /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java index 51dd6433fa1..deb9a1a7449 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java @@ -78,7 +78,7 @@ public final class UnsafeAccess { // APIs to read primitive data from a byte[] using Unsafe way /** - * Converts a byte array to a short value. + * Converts a byte array to a short value considering it was written in big-endian format. * @param bytes byte array * @param offset offset into array * @return the short value @@ -92,7 +92,7 @@ public final class UnsafeAccess { } /** - * Converts a byte array to an int value. + * Converts a byte array to an int value considering it was written in big-endian format. * @param bytes byte array * @param offset offset into array * @return the int value @@ -106,7 +106,7 @@ public final class UnsafeAccess { } /** - * Converts a byte array to a long value. + * Converts a byte array to a long value considering it was written in big-endian format. * @param bytes byte array * @param offset offset into array * @return the long value @@ -121,7 +121,7 @@ public final class UnsafeAccess { // APIs to write primitive data to a byte[] using Unsafe way /** - * Put a short value out to the specified byte array position. + * Put a short value out to the specified byte array position in big-endian format. * @param bytes the byte array * @param offset position in the array * @param val short to write out @@ -136,7 +136,7 @@ public final class UnsafeAccess { } /** - * Put an int value out to the specified byte array position. + * Put an int value out to the specified byte array position in big-endian format. * @param bytes the byte array * @param offset position in the array * @param val int to write out @@ -151,7 +151,7 @@ public final class UnsafeAccess { } /** - * Put a long value out to the specified byte array position. + * Put a long value out to the specified byte array position in big-endian format. * @param bytes the byte array * @param offset position in the array * @param val long to write out @@ -167,60 +167,87 @@ public final class UnsafeAccess { // APIs to read primitive data from a ByteBuffer using Unsafe way /** - * Reads a short value at the given buffer's offset. + * Reads a short value at the given buffer's offset considering it was written in big-endian + * format. + * * @param buf * @param offset * @return short value at offset */ public static short toShort(ByteBuffer buf, int offset) { - short ret; - if (buf.isDirect()) { - ret = theUnsafe.getShort(((DirectBuffer) buf).address() + offset); - } else { - ret = theUnsafe.getShort(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset); - } if (littleEndian) { - return Short.reverseBytes(ret); + return Short.reverseBytes(getAsShort(buf, offset)); } - return ret; + return getAsShort(buf, offset); } /** - * Reads an int value at the given buffer's offset. + * Reads bytes at the given offset as a short value. + * @param buf + * @param offset + * @return short value at offset + */ + static short getAsShort(ByteBuffer buf, int offset) { + if (buf.isDirect()) { + return theUnsafe.getShort(((DirectBuffer) buf).address() + offset); + } + return theUnsafe.getShort(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset); + } + + /** + * Reads an int value at the given buffer's offset considering it was written in big-endian + * format. + * * @param buf * @param offset * @return int value at offset */ public static int toInt(ByteBuffer buf, int offset) { - int ret; - if (buf.isDirect()) { - ret = theUnsafe.getInt(((DirectBuffer) buf).address() + offset); - } else { - ret = theUnsafe.getInt(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset); - } if (littleEndian) { - return Integer.reverseBytes(ret); + return Integer.reverseBytes(getAsInt(buf, offset)); } - return ret; + return getAsInt(buf, offset); } /** - * Reads a long value at the given buffer's offset. + * Reads bytes at the given offset as an int value. + * @param buf + * @param offset + * @return int value at offset + */ + static int getAsInt(ByteBuffer buf, int offset) { + if (buf.isDirect()) { + return theUnsafe.getInt(((DirectBuffer) buf).address() + offset); + } + return theUnsafe.getInt(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset); + } + + /** + * Reads a long value at the given buffer's offset considering it was written in big-endian + * format. + * * @param buf * @param offset * @return long value at offset */ public static long toLong(ByteBuffer buf, int offset) { - long ret; - if (buf.isDirect()) { - ret = theUnsafe.getLong(((DirectBuffer) buf).address() + offset); - } else { - ret = theUnsafe.getLong(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset); - } if (littleEndian) { - return Long.reverseBytes(ret); + return Long.reverseBytes(getAsLong(buf, offset)); } - return ret; + return getAsLong(buf, offset); + } + + /** + * Reads bytes at the given offset as a long value. + * @param buf + * @param offset + * @return long value at offset + */ + static long getAsLong(ByteBuffer buf, int offset) { + if (buf.isDirect()) { + return theUnsafe.getLong(((DirectBuffer) buf).address() + offset); + } + return theUnsafe.getLong(buf.array(), BYTE_ARRAY_BASE_OFFSET + buf.arrayOffset() + offset); } // APIs to copy data. This will be direct memory location copy and will be much faster