diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/BBKVComparator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/BBKVComparator.java new file mode 100644 index 00000000000..c15d32110ad --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/BBKVComparator.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.util.Comparator; + +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * A comparator for case where {@link ByteBufferKeyValue} is prevalent type (BBKV + * is base-type in hbase2). Takes a general comparator as fallback in case types are NOT the + * expected ByteBufferKeyValue. + * + *

This is a tricked-out Comparator at heart of hbase read and write. It is in + * the HOT path so we try all sorts of ugly stuff so we can go faster. See below + * in this javadoc comment for the list. + * + *

Apply this comparator narrowly so it is fed exclusively ByteBufferKeyValues + * as much as is possible so JIT can settle (e.g. make one per ConcurrentSkipListMap + * in HStore). + * + *

Exploits specially added methods in BBKV to save on deserializations of shorts, + * longs, etc: i.e. calculating the family length requires row length; pass it in + * rather than recalculate it, and so on. + * + *

This comparator does static dispatch to private final methods so hotspot is comfortable + * deciding inline. + * + *

Measurement has it that we almost have it so all inlines from memstore + * ConcurrentSkipListMap on down to the (unsafe) intrinisics that do byte compare + * and deserialize shorts and ints; needs a bit more work. + * + *

Does not take a Type to compare: i.e. it is not a Comparator<Cell> or + * CellComparator<Cell> or Comparator<ByteBufferKeyValue> because that adds + * another method to the hierarchy -- from compare(Object, Object) + * to dynamic compare(Cell, Cell) to static private compare -- and inlining doesn't happen if + * hierarchy is too deep (it is the case here). + * + *

Be careful making changes. Compare perf before and after and look at what + * hotspot ends up generating before committing change (jitwatch is helpful here). + * Changing this one class doubled write throughput (HBASE-20483). + */ +@InterfaceAudience.Private +public class BBKVComparator implements Comparator { + protected static final Logger LOG = LoggerFactory.getLogger(BBKVComparator.class); + private final Comparator fallback; + + public BBKVComparator(Comparator fallback) { + this.fallback = fallback; + } + + @Override + public int compare(Object l, Object r) { + // LOG.info("ltype={} rtype={}", l, r); + if ((l instanceof ByteBufferKeyValue) && (r instanceof ByteBufferKeyValue)) { + return compare((ByteBufferKeyValue)l, (ByteBufferKeyValue)r); + } + // Skip calling compare(Object, Object) and go direct to compare(Cell, Cell) + return this.fallback.compare((Cell)l, (Cell)r); + } + + // TODO: Come back here. We get a few percentage points extra of throughput if this is a + // private method. + static final int compare(ByteBufferKeyValue left, ByteBufferKeyValue right) { + // NOTE: Same method is in CellComparatorImpl, also private, not shared, intentionally. Not + // sharing gets us a few percent more throughput in compares. If changes here or there, make + // sure done in both places. + + // Compare Rows. Cache row length. + int leftRowLength = left.getRowLength(); + int rightRowLength = right.getRowLength(); + int diff = ByteBufferUtils.compareTo(left.getRowByteBuffer(), left.getRowPosition(), + leftRowLength, + right.getRowByteBuffer(), right.getRowPosition(), rightRowLength); + if (diff != 0) { + return diff; + } + + // If the column is not specified, the "minimum" key type appears as latest in the sorted + // order, regardless of the timestamp. This is used for specifying the last key/value in a + // given row, because there is no "lexicographically last column" (it would be infinitely long). + // The "maximum" key type does not need this behavior. Copied from KeyValue. This is bad in that + // we can't do memcmp w/ special rules like this. + // TODO: Is there a test for this behavior? + int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength); + int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition); + int leftKeyLength = left.getKeyLength(); + int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength, + leftFamilyLength); + + // No need of left row length below here. + + byte leftType = left.getTypeByte(leftKeyLength); + if (leftFamilyLength + leftQualifierLength == 0 && + leftType == KeyValue.Type.Minimum.getCode()) { + // left is "bigger", i.e. it appears later in the sorted order + return 1; + } + + int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength); + int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition); + int rightKeyLength = right.getKeyLength(); + int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength, + rightFamilyLength); + + // No need of right row length below here. + + byte rightType = right.getTypeByte(rightKeyLength); + if (rightFamilyLength + rightQualifierLength == 0 && + rightType == KeyValue.Type.Minimum.getCode()) { + return -1; + } + + // Compare families. + int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition); + int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition); + diff = ByteBufferUtils.compareTo(left.getFamilyByteBuffer(), leftFamilyPosition, + leftFamilyLength, + right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength); + if (diff != 0) { + return diff; + } + + // Compare qualifiers + diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(), + left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength, + right.getQualifierByteBuffer(), + right.getQualifierPosition(rightFamilyPosition, rightFamilyLength), + rightQualifierLength); + if (diff != 0) { + return diff; + } + + // Timestamps. + // Swap order we pass into compare so we get DESCENDING order. + diff = Long.compare(right.getTimestamp(rightKeyLength), left.getTimestamp(leftKeyLength)); + if (diff != 0) { + return diff; + } + + // Compare types. Let the delete types sort ahead of puts; i.e. types + // of higher numbers sort before those of lesser numbers. Maximum (255) + // appears ahead of everything, and minimum (0) appears after + // everything. + diff = (0xff & rightType) - (0xff & leftType); + if (diff != 0) { + return diff; + } + + // Negate following comparisons so later edits show up first mvccVersion: later sorts first + return Longs.compare(right.getSequenceId(), left.getSequenceId()); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java index 60be67082bc..3529d54f7df 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java @@ -21,6 +21,7 @@ import java.util.Comparator; import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceStability; + /** * Comparator for comparing cells and has some specialized methods that allows comparing individual * cell components like row, family, qualifier and timestamp @@ -130,4 +131,11 @@ public interface CellComparator extends Comparator { * timestamp 0 if both timestamps are equal */ int compareTimestamps(long leftCellts, long rightCellts); + + /** + * @return A dumbed-down, fast comparator for hbase2 base-type, the {@link ByteBufferKeyValue}. + * Create an instance when you make a new memstore, when you know only BBKVs will be passed. + * Do not pollute with types other than BBKV if can be helped; the Comparator will slow. + */ + Comparator getSimpleComparator(); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java index 785d8ffb1c3..c2f2ea55d52 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparatorImpl.java @@ -18,15 +18,18 @@ package org.apache.hadoop.hbase; +import java.util.Comparator; + import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceStability; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; + /** * Compare two HBase cells. Do not use this method comparing -ROOT- or @@ -34,9 +37,13 @@ import org.apache.hbase.thirdparty.com.google.common.primitives.Longs; * takes account of the special formatting of the row where we have commas to delimit table from * regionname, from row. See KeyValue for how it has a special comparator to do hbase:meta cells * and yet another for -ROOT-. - * While using this comparator for {{@link #compareRows(Cell, Cell)} et al, the hbase:meta cells + *

While using this comparator for {{@link #compareRows(Cell, Cell)} et al, the hbase:meta cells * format should be taken into consideration, for which the instance of this comparator * should be used. In all other cases the static APIs in this comparator would be enough + *

HOT methods. We spend a good portion of CPU comparing. Anything that makes the compare + * faster will likely manifest at the macro level. See also + * {@link BBKVComparator}. Use it when mostly {@link ByteBufferKeyValue}s. + *

*/ @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="UNKNOWN", @@ -57,21 +64,17 @@ public class CellComparatorImpl implements CellComparator { public static final CellComparatorImpl META_COMPARATOR = new MetaCellComparator(); @Override - public int compare(Cell a, Cell b) { + public final int compare(final Cell a, final Cell b) { return compare(a, b, false); } - /** - * Compare cells. - * @param ignoreSequenceid True if we are to compare the key portion only and ignore - * the sequenceid. Set to false to compare key and consider sequenceid. - * @return 0 if equal, -1 if a < b, and +1 if a > b. - */ @Override public int compare(final Cell a, final Cell b, boolean ignoreSequenceid) { + int diff = 0; + // "Peel off" the most common path. if (a instanceof ByteBufferKeyValue && b instanceof ByteBufferKeyValue) { - diff = compareByteBufferKeyValue((ByteBufferKeyValue)a, (ByteBufferKeyValue)b); + diff = BBKVComparator.compare((ByteBufferKeyValue)a, (ByteBufferKeyValue)b); if (diff != 0) { return diff; } @@ -88,82 +91,7 @@ public class CellComparatorImpl implements CellComparator { } // Negate following comparisons so later edits show up first mvccVersion: later sorts first - return ignoreSequenceid? diff: Longs.compare(b.getSequenceId(), a.getSequenceId()); - } - - /** - * Specialized comparator for the ByteBufferKeyValue type exclusivesly. - * Caches deserialized lengths of rows and families, etc., and reuses them where it can - * (ByteBufferKeyValue has been changed to be amenable to our providing pre-made lengths, etc.) - */ - private static final int compareByteBufferKeyValue(ByteBufferKeyValue left, - ByteBufferKeyValue right) { - // Compare Rows. Cache row length. - int leftRowLength = left.getRowLength(); - int rightRowLength = right.getRowLength(); - int diff = ByteBufferUtils.compareTo( - left.getRowByteBuffer(), left.getRowPosition(), leftRowLength, - right.getRowByteBuffer(), right.getRowPosition(), rightRowLength); - if (diff != 0) { - return diff; - } - - // If the column is not specified, the "minimum" key type appears the - // latest in the sorted order, regardless of the timestamp. This is used - // for specifying the last key/value in a given row, because there is no - // "lexicographically last column" (it would be infinitely long). The - // "maximum" key type does not need this behavior. - // Copied from KeyValue. This is bad in that we can't do memcmp w/ special rules like this. - // I tried to get rid of the above but scanners depend on it. TODO. - int leftFamilyLengthPosition = left.getFamilyLengthPosition(leftRowLength); - int leftFamilyLength = left.getFamilyLength(leftFamilyLengthPosition); - int rightFamilyLengthPosition = right.getFamilyLengthPosition(rightRowLength); - int rightFamilyLength = right.getFamilyLength(rightFamilyLengthPosition); - int leftKeyLength = left.getKeyLength(); - int leftQualifierLength = left.getQualifierLength(leftKeyLength, leftRowLength, - leftFamilyLength); - byte leftType = left.getTypeByte(leftKeyLength); - if (leftFamilyLength + leftQualifierLength == 0 && leftType == Type.Minimum.getCode()) { - // left is "bigger", i.e. it appears later in the sorted order - return 1; - } - int rightKeyLength = right.getKeyLength(); - int rightQualifierLength = right.getQualifierLength(rightKeyLength, rightRowLength, - rightFamilyLength); - byte rightType = right.getTypeByte(rightKeyLength); - if (rightFamilyLength + rightQualifierLength == 0 && rightType == Type.Minimum.getCode()) { - return -1; - } - - // Compare families. - int leftFamilyPosition = left.getFamilyPosition(leftFamilyLengthPosition); - int rightFamilyPosition = right.getFamilyPosition(rightFamilyLengthPosition); - diff = ByteBufferUtils.compareTo( - left.getFamilyByteBuffer(), leftFamilyPosition, leftFamilyLength, - right.getFamilyByteBuffer(), rightFamilyPosition, rightFamilyLength); - if (diff != 0) { - return diff; - } - // Compare qualifiers - diff = ByteBufferUtils.compareTo(left.getQualifierByteBuffer(), - left.getQualifierPosition(leftFamilyPosition, leftFamilyLength), leftQualifierLength, - right.getQualifierByteBuffer(), - right.getQualifierPosition(rightFamilyPosition, rightFamilyLength), - rightQualifierLength); - if (diff != 0) { - return diff; - } - // Timestamps. - diff = compareTimestampsInternal(left.getTimestamp(leftKeyLength), - right.getTimestamp(rightKeyLength)); - if (diff != 0) { - return diff; - } - // Compare types. Let the delete types sort ahead of puts; i.e. types - // of higher numbers sort before those of lesser numbers. Maximum (255) - // appears ahead of everything, and minimum (0) appears after - // everything. - return (0xff & rightType) - (0xff & leftType); + return ignoreSequenceid? diff: Long.compare(b.getSequenceId(), a.getSequenceId()); } /** @@ -254,7 +182,7 @@ public class CellComparatorImpl implements CellComparator { return compareRows(left, left.getRowLength(), right, right.getRowLength()); } - int compareRows(final Cell left, int leftRowLength, final Cell right, int rightRowLength) { + static int compareRows(final Cell left, int leftRowLength, final Cell right, int rightRowLength) { // left and right can be exactly the same at the beginning of a row if (left == right) { return 0; @@ -341,7 +269,7 @@ public class CellComparatorImpl implements CellComparator { return diff; } - diff = compareTimestamps(left, right); + diff = compareTimestamps(left.getTimestamp(), right.getTimestamp()); if (diff != 0) { return diff; } @@ -355,16 +283,12 @@ public class CellComparatorImpl implements CellComparator { @Override public int compareTimestamps(final Cell left, final Cell right) { - return compareTimestampsInternal(left.getTimestamp(), right.getTimestamp()); + return compareTimestamps(left.getTimestamp(), right.getTimestamp()); } @Override public int compareTimestamps(final long ltimestamp, final long rtimestamp) { - return compareTimestampsInternal(ltimestamp, rtimestamp); - } - - private static final int compareTimestampsInternal(final long ltimestamp, final long rtimestamp) { - // Swap the times so sort is newest to oldest, descending. + // Swap order we pass into compare so we get DESCENDING order. return Long.compare(rtimestamp, ltimestamp); } @@ -373,6 +297,7 @@ public class CellComparatorImpl implements CellComparator { * {@link KeyValue}s. */ public static class MetaCellComparator extends CellComparatorImpl { + // TODO: Do we need a ByteBufferKeyValue version of this? @Override public int compareRows(final Cell left, final Cell right) { return compareRows(left.getRowArray(), left.getRowOffset(), left.getRowLength(), @@ -451,5 +376,15 @@ public class CellComparatorImpl implements CellComparator { right, rightFarDelimiter, rlength - (rightFarDelimiter - roffset)); return result; } + + @Override + public Comparator getSimpleComparator() { + return this; + } + } + + @Override + public Comparator getSimpleComparator() { + return new BBKVComparator(this); } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java index 07b0e3f7986..8efe88b8e0c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ExtendedCell.java @@ -30,9 +30,9 @@ import org.apache.yetus.audience.InterfaceAudience; * must implement this. */ @InterfaceAudience.Private -public interface ExtendedCell extends RawCell, HeapSize, Cloneable { - +public interface ExtendedCell extends RawCell, HeapSize { int CELL_NOT_BASED_ON_CHUNK = -1; + /** * Write this cell to an OutputStream in a {@link KeyValue} format. *
KeyValue format
@@ -87,6 +87,13 @@ public interface ExtendedCell extends RawCell, HeapSize, Cloneable { getValueLength(), getTagsLength(), withTags); } + /** + * @return Serialized size (defaults to include tag length). + */ + default int getSerializedSize() { + return getSerializedSize(true); + } + /** * Write this Cell into the given buf's offset in a {@link KeyValue} format. * @param buf The buffer where to write the Cell. @@ -108,7 +115,8 @@ public interface ExtendedCell extends RawCell, HeapSize, Cloneable { /** * Extracts the id of the backing bytebuffer of this cell if it was obtained from fixed sized * chunks as in case of MemstoreLAB - * @return the chunk id if the cell is backed by fixed sized Chunks, else return -1 + * @return the chunk id if the cell is backed by fixed sized Chunks, else return + * {@link #CELL_NOT_BASED_ON_CHUNK}; i.e. -1. */ default int getChunkId() { return CELL_NOT_BASED_ON_CHUNK; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java index 7093b4b2dcf..e47702389f1 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java @@ -24,7 +24,7 @@ import org.apache.hadoop.hbase.util.ClassSize; import org.apache.yetus.audience.InterfaceAudience; @InterfaceAudience.Private -public class IndividualBytesFieldCell implements ExtendedCell { +public class IndividualBytesFieldCell implements ExtendedCell, Cloneable { private static final long FIXED_OVERHEAD = ClassSize.align( // do alignment(padding gap) ClassSize.OBJECT // object header diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index f3bfbd3fd02..f7f6c0ded46 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -76,7 +76,7 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti * length and actual tag bytes length. */ @InterfaceAudience.Private -public class KeyValue implements ExtendedCell { +public class KeyValue implements ExtendedCell, Cloneable { private static final ArrayList EMPTY_ARRAY_LIST = new ArrayList<>(); private static final Logger LOG = LoggerFactory.getLogger(KeyValue.class); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java index 2246002eea4..11745a4d96d 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java @@ -50,8 +50,7 @@ public final class ByteBufferUtils { public final static int NEXT_BIT_MASK = 1 << 7; @VisibleForTesting final static boolean UNSAFE_AVAIL = UnsafeAvailChecker.isAvailable(); - @VisibleForTesting - final static boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned(); + public final static boolean UNSAFE_UNALIGNED = UnsafeAvailChecker.unaligned(); private ByteBufferUtils() { } @@ -630,6 +629,8 @@ public final class ByteBufferUtils { } public static int compareTo(ByteBuffer buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) { + // NOTE: This method is copied over in BBKVComparator!!!!! For perf reasons. If you make + // changes here, make them there too!!!! if (UNSAFE_UNALIGNED) { long offset1Adj, offset2Adj; Object refObj1 = null, refObj2 = null; @@ -671,9 +672,12 @@ public final class ByteBufferUtils { return compareTo(buf1, o1, l1, buf2, o2, l2) == 0; } + // The below two methods show up in lots of places. Versions of them in commons util and in + // Cassandra. In guava too? They are copied from ByteBufferUtils. They are here as static + // privates. Seems to make code smaller and make Hotspot happier (comes of compares and study + // of compiled code via jitwatch). + public static int compareTo(byte [] buf1, int o1, int l1, ByteBuffer buf2, int o2, int l2) { - // This method is nearly same as the compareTo that follows but hard sharing code given - // byte array and bytebuffer types and this is a hot code path if (UNSAFE_UNALIGNED) { long offset2Adj; Object refObj2 = null; @@ -738,7 +742,7 @@ public final class ByteBufferUtils { long lw = UnsafeAccess.theUnsafe.getLong(obj1, o1 + (long) i); long rw = UnsafeAccess.theUnsafe.getLong(obj2, o2 + (long) i); if (lw != rw) { - if (!UnsafeAccess.littleEndian) { + if (!UnsafeAccess.LITTLE_ENDIAN) { return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 6eb09c11df4..15facea3071 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -1549,7 +1549,7 @@ public class Bytes implements Comparable { long lw = theUnsafe.getLong(buffer1, offset1Adj + i); long rw = theUnsafe.getLong(buffer2, offset2Adj + i); if (lw != rw) { - if(!UnsafeAccess.littleEndian) { + if(!UnsafeAccess.LITTLE_ENDIAN) { return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1; } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java index 486f81beb6d..953ad5b533e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/UnsafeAccess.java @@ -42,7 +42,7 @@ public final class UnsafeAccess { /** The offset to the first element in a byte array. */ public static final long BYTE_ARRAY_BASE_OFFSET; - static final boolean littleEndian = ByteOrder.nativeOrder() + public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() .equals(ByteOrder.LITTLE_ENDIAN); // This number limits the number of bytes to copy per call to Unsafe's @@ -81,7 +81,7 @@ public final class UnsafeAccess { * @return the short value */ public static short toShort(byte[] bytes, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Short.reverseBytes(theUnsafe.getShort(bytes, offset + BYTE_ARRAY_BASE_OFFSET)); } else { return theUnsafe.getShort(bytes, offset + BYTE_ARRAY_BASE_OFFSET); @@ -95,7 +95,7 @@ public final class UnsafeAccess { * @return the int value */ public static int toInt(byte[] bytes, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Integer.reverseBytes(theUnsafe.getInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET)); } else { return theUnsafe.getInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET); @@ -109,7 +109,7 @@ public final class UnsafeAccess { * @return the long value */ public static long toLong(byte[] bytes, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Long.reverseBytes(theUnsafe.getLong(bytes, offset + BYTE_ARRAY_BASE_OFFSET)); } else { return theUnsafe.getLong(bytes, offset + BYTE_ARRAY_BASE_OFFSET); @@ -125,7 +125,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putShort(byte[] bytes, int offset, short val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Short.reverseBytes(val); } theUnsafe.putShort(bytes, offset + BYTE_ARRAY_BASE_OFFSET, val); @@ -140,7 +140,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putInt(byte[] bytes, int offset, int val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Integer.reverseBytes(val); } theUnsafe.putInt(bytes, offset + BYTE_ARRAY_BASE_OFFSET, val); @@ -155,7 +155,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putLong(byte[] bytes, int offset, long val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Long.reverseBytes(val); } theUnsafe.putLong(bytes, offset + BYTE_ARRAY_BASE_OFFSET, val); @@ -172,7 +172,7 @@ public final class UnsafeAccess { * @return short value at offset */ public static short toShort(ByteBuffer buf, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Short.reverseBytes(getAsShort(buf, offset)); } return getAsShort(buf, offset); @@ -186,7 +186,7 @@ public final class UnsafeAccess { * @return short value at offset */ public static short toShort(Object ref, long offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Short.reverseBytes(theUnsafe.getShort(ref, offset)); } return theUnsafe.getShort(ref, offset); @@ -214,7 +214,7 @@ public final class UnsafeAccess { * @return int value at offset */ public static int toInt(ByteBuffer buf, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Integer.reverseBytes(getAsInt(buf, offset)); } return getAsInt(buf, offset); @@ -228,7 +228,7 @@ public final class UnsafeAccess { * @return int value at offset */ public static int toInt(Object ref, long offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Integer.reverseBytes(theUnsafe.getInt(ref, offset)); } return theUnsafe.getInt(ref, offset); @@ -256,7 +256,7 @@ public final class UnsafeAccess { * @return long value at offset */ public static long toLong(ByteBuffer buf, int offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Long.reverseBytes(getAsLong(buf, offset)); } return getAsLong(buf, offset); @@ -270,7 +270,7 @@ public final class UnsafeAccess { * @return long value at offset */ public static long toLong(Object ref, long offset) { - if (littleEndian) { + if (LITTLE_ENDIAN) { return Long.reverseBytes(theUnsafe.getLong(ref, offset)); } return theUnsafe.getLong(ref, offset); @@ -297,7 +297,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putInt(ByteBuffer buf, int offset, int val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Integer.reverseBytes(val); } if (buf.isDirect()) { @@ -402,7 +402,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putShort(ByteBuffer buf, int offset, short val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Short.reverseBytes(val); } if (buf.isDirect()) { @@ -421,7 +421,7 @@ public final class UnsafeAccess { * @return incremented offset */ public static int putLong(ByteBuffer buf, int offset, long val) { - if (littleEndian) { + if (LITTLE_ENDIAN) { val = Long.reverseBytes(val); } if (buf.isDirect()) { diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java index 706941154d6..6443d84ebd2 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestByteBufferKeyValue.java @@ -24,6 +24,8 @@ import static org.junit.Assert.assertFalse; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.ConcurrentSkipListMap; + import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.testclassification.MiscTests; import org.apache.hadoop.hbase.testclassification.SmallTests; @@ -67,9 +69,23 @@ public class TestByteBufferKeyValue { assertTrue(CellComparatorImpl.COMPARATOR.compare(cell1, cell3) < 0); Cell cell4 = getOffheapCell(row1, Bytes.toBytes("f"), qual2); assertTrue(CellComparatorImpl.COMPARATOR.compare(cell1, cell4) > 0); + BBKVComparator comparator = new BBKVComparator(null); + assertTrue(comparator.compare(cell1, cell2) < 0); + assertTrue(comparator.compare(cell1, cell3) < 0); + assertTrue(comparator.compare(cell1, cell4) > 0); + ByteBuffer buf = ByteBuffer.allocate(row1.length); + ByteBufferUtils.copyFromArrayToBuffer(buf, row1, 0, row1.length); + + ConcurrentSkipListMap map = + new ConcurrentSkipListMap<>(comparator); + map.put((ByteBufferKeyValue)cell1, (ByteBufferKeyValue)cell1); + map.put((ByteBufferKeyValue)cell2, (ByteBufferKeyValue)cell2); + map.put((ByteBufferKeyValue)cell3, (ByteBufferKeyValue)cell3); + map.put((ByteBufferKeyValue)cell1, (ByteBufferKeyValue)cell1); + map.put((ByteBufferKeyValue)cell1, (ByteBufferKeyValue)cell1); } - private Cell getOffheapCell(byte [] row, byte [] family, byte [] qualifier) { + private static Cell getOffheapCell(byte [] row, byte [] family, byte [] qualifier) { KeyValue kvCell = new KeyValue(row, family, qualifier, 0L, Type.Put, row); ByteBuffer buf = ByteBuffer.allocateDirect(kvCell.getBuffer().length); ByteBufferUtils.copyFromArrayToBuffer(buf, kvCell.getBuffer(), 0, kvCell.getBuffer().length); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 4c9f6f6d659..5bcaa172d1d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -332,8 +332,8 @@ public class HFile { try { ostream.setDropBehind(shouldDropBehind && cacheConf.shouldDropBehindCompaction()); } catch (UnsupportedOperationException uoe) { - if (LOG.isTraceEnabled()) LOG.trace("Unable to set drop behind on " + path, uoe); - else if (LOG.isDebugEnabled()) LOG.debug("Unable to set drop behind on " + path); + LOG.trace("Unable to set drop behind on {}", path, uoe); + LOG.debug("Unable to set drop behind on {}", path.getName()); } } return new HFileWriterImpl(conf, cacheConf, path, ostream, comparator, fileContext); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java index a4fe883668c..94a256d9341 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java @@ -53,7 +53,7 @@ public class CellSet implements NavigableSet { private final int numUniqueKeys; CellSet(final CellComparator c) { - this.delegatee = new ConcurrentSkipListMap<>(c); + this.delegatee = new ConcurrentSkipListMap<>(c.getSimpleComparator()); this.numUniqueKeys = UNKNOWN_NUM_UNIQUES; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java index 5577be47ad2..44ba65f6208 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ChunkCreator.java @@ -46,7 +46,7 @@ import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFacto @InterfaceAudience.Private public class ChunkCreator { private static final Logger LOG = LoggerFactory.getLogger(ChunkCreator.class); - // monotonically increasing chunkid + // monotonically increasing chunkid. Starts at 1. private AtomicInteger chunkID = new AtomicInteger(1); // maps the chunk against the monotonically increasing chunk id. We need to preserve the // natural ordering of the key diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 54a604fe60e..3b57837f17d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -2551,8 +2551,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } MemStoreSize mss = this.memStoreSizing.getMemStoreSize(); LOG.info("Flushing " + + storesToFlush.size() + "/" + stores.size() + " column families," + - " memstore data size=" + StringUtils.byteDesc(mss.getDataSize()) + - " memstore heap size=" + StringUtils.byteDesc(mss.getHeapSize()) + + " dataSize=" + StringUtils.byteDesc(mss.getDataSize()) + + " heapSize=" + StringUtils.byteDesc(mss.getHeapSize()) + ((perCfExtras != null && perCfExtras.length() > 0)? perCfExtras.toString(): "") + ((wal != null) ? "" : "; WAL is null, using passed sequenceid=" + sequenceId)); } @@ -2742,10 +2742,10 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi long time = EnvironmentEdgeManager.currentTime() - startTime; MemStoreSize mss = prepareResult.totalFlushableSize.getMemStoreSize(); long memstoresize = this.memStoreSizing.getMemStoreSize().getDataSize(); - String msg = "Finished memstore flush;" - + " data size ~" + StringUtils.byteDesc(mss.getDataSize()) + "/" + mss.getDataSize() - + ", heap size ~" + StringUtils.byteDesc(mss.getHeapSize()) + "/" + mss.getHeapSize() - + ", currentsize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize + String msg = "Finished flush of" + + " dataSize ~" + StringUtils.byteDesc(mss.getDataSize()) + "/" + mss.getDataSize() + + ", heapSize ~" + StringUtils.byteDesc(mss.getHeapSize()) + "/" + mss.getHeapSize() + + ", currentSize=" + StringUtils.byteDesc(memstoresize) + "/" + memstoresize + " for " + this.getRegionInfo().getEncodedName() + " in " + time + "ms, sequenceid=" + flushOpSeqId + ", compaction requested=" + compactionRequested + ((wal == null) ? "; wal=null" : ""); @@ -4225,7 +4225,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * @param cellItr * @param now */ - public void updateCellTimestamps(final Iterable> cellItr, final byte[] now) + private static void updateCellTimestamps(final Iterable> cellItr, final byte[] now) throws IOException { for (List cells: cellItr) { if (cells == null) continue; @@ -4280,12 +4280,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi requestFlush(); // Don't print current limit because it will vary too much. The message is used as a key // over in RetriesExhaustedWithDetailsException processing. - throw new RegionTooBusyException("Over memstore limit; regionName=" + + throw new RegionTooBusyException("Over memstore limit=" + + org.apache.hadoop.hbase.procedure2.util.StringUtils.humanSize(this.blockingMemStoreSize) + + ", regionName=" + (this.getRegionInfo() == null? "unknown": this.getRegionInfo().getEncodedName()) + - ", server=" + (this.getRegionServerServices() == null ? "unknown": - this.getRegionServerServices().getServerName()) + - ", blockingMemStoreSize=" + - org.apache.hadoop.hbase.procedure2.util.StringUtils.humanSize(blockingMemStoreSize)); + ", server=" + (this.getRegionServerServices() == null? "unknown": + this.getRegionServerServices().getServerName())); } } @@ -8591,4 +8591,5 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi public void requestFlush(FlushLifeCycleTracker tracker) throws IOException { requestFlush0(tracker); } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java index 0db0fd9c285..ac7223f2d8d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MemStoreLABImpl.java @@ -120,12 +120,13 @@ public class MemStoreLABImpl implements MemStoreLAB { */ @Override public Cell forceCopyOfBigCellInto(Cell cell) { - int size = KeyValueUtil.length(cell) + ChunkCreator.SIZEOF_CHUNK_HEADER; + int size = cell instanceof ExtendedCell? ((ExtendedCell)cell).getSerializedSize(): + KeyValueUtil.length(cell); + size += ChunkCreator.SIZEOF_CHUNK_HEADER; Preconditions.checkArgument(size >= 0, "negative size"); if (size <= dataChunkSize) { // Using copyCellInto for cells which are bigger than the original maxAlloc - Cell newCell = copyCellInto(cell, dataChunkSize); - return newCell; + return copyCellInto(cell, dataChunkSize); } else { Chunk c = getNewExternalChunk(size); int allocOffset = c.alloc(size); @@ -134,7 +135,8 @@ public class MemStoreLABImpl implements MemStoreLAB { } private Cell copyCellInto(Cell cell, int maxAlloc) { - int size = KeyValueUtil.length(cell); + int size = cell instanceof ExtendedCell? ((ExtendedCell)cell).getSerializedSize(): + KeyValueUtil.length(cell); Preconditions.checkArgument(size >= 0, "negative size"); // Callers should satisfy large allocations directly from JVM since they // don't cause fragmentation as badly. @@ -169,7 +171,7 @@ public class MemStoreLABImpl implements MemStoreLAB { * Clone the passed cell by copying its data into the passed buf and create a cell with a chunkid * out of it */ - private Cell copyToChunkCell(Cell cell, ByteBuffer buf, int offset, int len) { + private static Cell copyToChunkCell(Cell cell, ByteBuffer buf, int offset, int len) { int tagsLen = cell.getTagsLength(); if (cell instanceof ExtendedCell) { ((ExtendedCell) cell).write(buf, offset); @@ -255,8 +257,7 @@ public class MemStoreLABImpl implements MemStoreLAB { */ private Chunk getOrMakeChunk() { // Try to get the chunk - Chunk c; - c = currChunk.get(); + Chunk c = currChunk.get(); if (c != null) { return c; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java index 7727b108c67..24604d988e8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALFactory.java @@ -47,9 +47,11 @@ import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesti * implementations: *
    *
  • defaultProvider : whatever provider is standard for the hbase version. Currently - * "filesystem"
  • + * "asyncfs" + *
  • asyncfs : a provider that will run on top of an implementation of the Hadoop + * FileSystem interface via an asynchronous client.
  • *
  • filesystem : a provider that will run on top of an implementation of the Hadoop - * FileSystem interface, normally HDFS.
  • + * FileSystem interface via HDFS's synchronous DFSClient. *
  • multiwal : a provider that will use multiple "filesystem" wal instances per region * server.
  • *
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestTagRewriteCell.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestTagRewriteCell.java index dc47661166d..d852d2db2ca 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestTagRewriteCell.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestTagRewriteCell.java @@ -47,9 +47,10 @@ public class TestTagRewriteCell { // VisibilityController and AccessController) Cell trCell2 = PrivateCellUtil.createCell(trCell, new byte[fakeTagArrayLength]); - assertTrue("TagRewriteCell containing a TagRewriteCell's heapsize should be larger than a " + - "single TagRewriteCell's heapsize", trCellHeapSize < ((HeapSize)trCell2).heapSize()); - assertTrue("TagRewriteCell should have had nulled out tags array", ((HeapSize)trCell).heapSize() < - trCellHeapSize); + assertTrue("TagRewriteCell containing a TagRewriteCell's heapsize should be " + + "larger than a single TagRewriteCell's heapsize", + trCellHeapSize < ((HeapSize)trCell2).heapSize()); + assertTrue("TagRewriteCell should have had nulled out tags array", + ((HeapSize)trCell).heapSize() < trCellHeapSize); } }