diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index 2a60eb74af6..02b448c04ac 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Writable; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellComparator; import com.google.common.primitives.Longs; @@ -63,7 +65,7 @@ import com.google.common.primitives.Longs; */ @InterfaceAudience.Public @InterfaceStability.Evolving -public class KeyValue implements Writable, HeapSize { +public class KeyValue implements Cell, Writable, HeapSize { static final Log LOG = LogFactory.getLog(KeyValue.class); // TODO: Group Key-only comparators and operations into a Key class, just // for neatness sake, if can figure what to call it. @@ -261,12 +263,23 @@ public class KeyValue implements Writable, HeapSize { /** Here be dragons **/ // used to achieve atomic operations in the memstore. - public long getMemstoreTS() { + @Override + public long getMvccVersion() { return memstoreTS; } + public void setMvccVersion(long mvccVersion){ + this.memstoreTS = mvccVersion; + } + + @Deprecated + public long getMemstoreTS() { + return getMvccVersion(); + } + + @Deprecated public void setMemstoreTS(long memstoreTS) { - this.memstoreTS = memstoreTS; + setMvccVersion(memstoreTS); } // default value is 0, aka DNC @@ -831,19 +844,21 @@ public class KeyValue implements Writable, HeapSize { value, voffset, vlength); } - // Needed doing 'contains' on List. Only compares the key portion, not the - // value. + /** + * Needed doing 'contains' on List. Only compares the key portion, not the value. + * + * For temporary backwards compatibility with the original KeyValue.equals method, we ignore the + * mvccVersion. + */ + @Override public boolean equals(Object other) { - if (!(other instanceof KeyValue)) { + if (!(other instanceof Cell)) { return false; } - KeyValue kv = (KeyValue)other; - // Comparing bytes should be fine doing equals test. Shouldn't have to - // worry about special .META. comparators doing straight equals. - return Bytes.equals(getBuffer(), getKeyOffset(), getKeyLength(), - kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); + return CellComparator.equalsIgnoreMvccVersion(this, (Cell)other); } + @Override public int hashCode() { byte[] b = getBuffer(); int start = getOffset(), end = getOffset() + getLength(); @@ -864,6 +879,7 @@ public class KeyValue implements Writable, HeapSize { * Clones a KeyValue. This creates a copy, re-allocating the buffer. * @return Fully copied clone of this KeyValue */ + @Override public KeyValue clone() { byte [] b = new byte[this.length]; System.arraycopy(this.bytes, this.offset, b, 0, this.length); @@ -1041,9 +1057,18 @@ public class KeyValue implements Writable, HeapSize { return keyLength; } + /** + * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array) + */ + @Override + public byte[] getValueArray() { + return bytes; + } + /** * @return Value offset */ + @Override public int getValueOffset() { return getKeyOffset() + getKeyLength(); } @@ -1051,13 +1076,23 @@ public class KeyValue implements Writable, HeapSize { /** * @return Value length */ + @Override public int getValueLength() { return Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT); } + /** + * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array) + */ + @Override + public byte[] getRowArray() { + return bytes; + } + /** * @return Row offset */ + @Override public int getRowOffset() { return getKeyOffset() + Bytes.SIZEOF_SHORT; } @@ -1065,13 +1100,23 @@ public class KeyValue implements Writable, HeapSize { /** * @return Row length */ + @Override public short getRowLength() { return Bytes.toShort(this.bytes, getKeyOffset()); } + /** + * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array) + */ + @Override + public byte[] getFamilyArray() { + return bytes; + } + /** * @return Family offset */ + @Override public int getFamilyOffset() { return getFamilyOffset(getRowLength()); } @@ -1086,6 +1131,7 @@ public class KeyValue implements Writable, HeapSize { /** * @return Family length */ + @Override public byte getFamilyLength() { return getFamilyLength(getFamilyOffset()); } @@ -1097,9 +1143,18 @@ public class KeyValue implements Writable, HeapSize { return this.bytes[foffset-1]; } + /** + * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array) + */ + @Override + public byte[] getQualifierArray() { + return bytes; + } + /** * @return Qualifier offset */ + @Override public int getQualifierOffset() { return getQualifierOffset(getFamilyOffset()); } @@ -1114,6 +1169,7 @@ public class KeyValue implements Writable, HeapSize { /** * @return Qualifier length */ + @Override public int getQualifierLength() { return getQualifierLength(getRowLength(),getFamilyLength()); } @@ -1273,6 +1329,7 @@ public class KeyValue implements Writable, HeapSize { * @return Timestamp */ private long timestampCache = -1; + @Override public long getTimestamp() { if (timestampCache == -1) { timestampCache = getTimestamp(getKeyLength()); @@ -1296,6 +1353,14 @@ public class KeyValue implements Writable, HeapSize { return getType(getKeyLength()); } + /** + * @return KeyValue.TYPE byte representation + */ + @Override + public byte getTypeByte() { + return getType(getKeyLength()); + } + /** * @param keylength Pass if you have it to save on a int creation. * @return Type of this KeyValue. @@ -2564,13 +2629,23 @@ public class KeyValue implements Writable, HeapSize { } } - // HeapSize + /** + * HeapSize implementation + * + * We do not count the bytes in the rowCache because it should be empty for a KeyValue in the + * MemStore. + */ + @Override public long heapSize() { - return ClassSize.align(ClassSize.OBJECT + (2 * ClassSize.REFERENCE) + - ClassSize.align(ClassSize.ARRAY) + ClassSize.align(length) + - (3 * Bytes.SIZEOF_INT) + - ClassSize.align(ClassSize.ARRAY) + - (2 * Bytes.SIZEOF_LONG)); + int sum = 0; + sum += ClassSize.OBJECT;// the KeyValue object itself + sum += 2 * ClassSize.REFERENCE;// 2 * pointers to "bytes" and "rowCache" byte[] + sum += 2 * ClassSize.align(ClassSize.ARRAY);// 2 * "bytes" and "rowCache" byte[] + sum += ClassSize.align(length);// number of bytes of data in the "bytes" array + //ignore the data in the rowCache because it is cleared for inactive memstore KeyValues + sum += 3 * Bytes.SIZEOF_INT;// offset, length, keyLength + sum += 2 * Bytes.SIZEOF_LONG;// timestampCache, memstoreTS + return ClassSize.align(sum); } // this overload assumes that the length bytes have already been read, @@ -2587,11 +2662,13 @@ public class KeyValue implements Writable, HeapSize { } // Writable + @Override public void readFields(final DataInput in) throws IOException { int length = in.readInt(); readFields(length, in); } + @Override public void write(final DataOutput out) throws IOException { out.writeInt(this.length); out.write(this.bytes, this.offset, this.length); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java new file mode 100644 index 00000000000..1beeb4a82ce --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.IterableUtils; +import org.apache.hadoop.hbase.util.Strings; + +import com.google.common.collect.Lists; + +@InterfaceAudience.Private +public class KeyValueTestUtil { + + public static KeyValue create( + String row, + String family, + String qualifier, + long timestamp, + String value) + { + return create(row, family, qualifier, timestamp, KeyValue.Type.Put, value); + } + + public static KeyValue create( + String row, + String family, + String qualifier, + long timestamp, + KeyValue.Type type, + String value) + { + return new KeyValue( + Bytes.toBytes(row), + Bytes.toBytes(family), + Bytes.toBytes(qualifier), + timestamp, + type, + Bytes.toBytes(value) + ); + } + + + public static ByteBuffer toByteBufferAndRewind(final Iterable kvs, + boolean includeMemstoreTS) { + int totalBytes = KeyValueTool.totalLengthWithMvccVersion(kvs, includeMemstoreTS); + ByteBuffer bb = ByteBuffer.allocate(totalBytes); + for (KeyValue kv : IterableUtils.nullSafe(kvs)) { + KeyValueTool.appendToByteBuffer(bb, kv, includeMemstoreTS); + } + bb.rewind(); + return bb; + } + + public static List rewindThenToList(final ByteBuffer bb, + final boolean includesMemstoreTS) { + bb.rewind(); + List kvs = Lists.newArrayList(); + KeyValue kv = null; + while (true) { + kv = KeyValueTool.nextShallowCopy(bb, includesMemstoreTS); + if (kv == null) { + break; + } + kvs.add(kv); + } + return kvs; + } + + + /********************* toString ************************************/ + + public static String toStringWithPadding(final Collection kvs, + final boolean includeMeta) { + int maxRowStringLength = 0; + int maxFamilyStringLength = 0; + int maxQualifierStringLength = 0; + int maxTimestampLength = 0; + for (KeyValue kv : kvs) { + maxRowStringLength = Math.max(maxRowStringLength, getRowString(kv).length()); + maxFamilyStringLength = Math.max(maxFamilyStringLength, getFamilyString(kv).length()); + maxQualifierStringLength = Math.max(maxQualifierStringLength, getQualifierString(kv) + .length()); + maxTimestampLength = Math.max(maxTimestampLength, Long.valueOf(kv.getTimestamp()).toString() + .length()); + } + StringBuilder sb = new StringBuilder(); + for (KeyValue kv : kvs) { + if (sb.length() > 0) { + sb.append("\n"); + } + String row = toStringWithPadding(kv, maxRowStringLength, maxFamilyStringLength, + maxQualifierStringLength, maxTimestampLength, includeMeta); + sb.append(row); + } + return sb.toString(); + } + + protected static String toStringWithPadding(final KeyValue kv, final int maxRowLength, + int maxFamilyLength, int maxQualifierLength, int maxTimestampLength, boolean includeMeta) { + String leadingLengths = ""; + String familyLength = kv.getFamilyLength() + " "; + if (includeMeta) { + leadingLengths += Strings.padFront(kv.getKeyLength() + "", '0', 4); + leadingLengths += " "; + leadingLengths += Strings.padFront(kv.getValueLength() + "", '0', 4); + leadingLengths += " "; + leadingLengths += Strings.padFront(kv.getRowLength() + "", '0', 2); + leadingLengths += " "; + } + int spacesAfterRow = maxRowLength - getRowString(kv).length() + 2; + int spacesAfterFamily = maxFamilyLength - getFamilyString(kv).length() + 2; + int spacesAfterQualifier = maxQualifierLength - getQualifierString(kv).length() + 1; + int spacesAfterTimestamp = maxTimestampLength + - Long.valueOf(kv.getTimestamp()).toString().length() + 1; + return leadingLengths + getRowString(kv) + Strings.repeat(' ', spacesAfterRow) + + familyLength + getFamilyString(kv) + Strings.repeat(' ', spacesAfterFamily) + + getQualifierString(kv) + Strings.repeat(' ', spacesAfterQualifier) + + getTimestampString(kv) + Strings.repeat(' ', spacesAfterTimestamp) + + getTypeString(kv) + " " + getValueString(kv); + } + + protected static String getRowString(final KeyValue kv) { + return Bytes.toStringBinary(kv.getRow()); + } + + protected static String getFamilyString(final KeyValue kv) { + return Bytes.toStringBinary(kv.getFamily()); + } + + protected static String getQualifierString(final KeyValue kv) { + return Bytes.toStringBinary(kv.getQualifier()); + } + + protected static String getTimestampString(final KeyValue kv) { + return kv.getTimestamp() + ""; + } + + protected static String getTypeString(final KeyValue kv) { + return KeyValue.Type.codeToType(kv.getType()).toString(); + } + + protected static String getValueString(final KeyValue kv) { + return Bytes.toStringBinary(kv.getValue()); + } + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java new file mode 100644 index 00000000000..a2f75b996c2 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTool.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.IterableUtils; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hbase.Cell; +import org.apache.hbase.cell.CellTool; + +/** + * static convenience methods for dealing with KeyValues and collections of KeyValues + */ +@InterfaceAudience.Private +public class KeyValueTool { + + /**************** length *********************/ + + public static int length(final Cell cell) { + return (int)KeyValue.getKeyValueDataStructureSize(cell.getRowLength(), cell.getFamilyLength(), + cell.getQualifierLength(), cell.getValueLength()); + } + + protected static int keyLength(final Cell cell) { + return (int)KeyValue.getKeyDataStructureSize(cell.getRowLength(), cell.getFamilyLength(), + cell.getQualifierLength()); + } + + public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) { + int length = kv.getLength(); + if (includeMvccVersion) { + length += WritableUtils.getVIntSize(kv.getMvccVersion()); + } + return length; + } + + public static int totalLengthWithMvccVersion(final Iterable kvs, + final boolean includeMvccVersion) { + int length = 0; + for (KeyValue kv : IterableUtils.nullSafe(kvs)) { + length += lengthWithMvccVersion(kv, includeMvccVersion); + } + return length; + } + + + /**************** copy key only *********************/ + + public static KeyValue copyToNewKeyValue(final Cell cell) { + KeyValue kvCell = new KeyValue(copyToNewByteArray(cell)); + kvCell.setMvccVersion(cell.getMvccVersion()); + return kvCell; + } + + public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) { + byte[] bytes = new byte[keyLength(cell)]; + appendKeyToByteArrayWithoutValue(cell, bytes, 0); + ByteBuffer buffer = ByteBuffer.wrap(bytes); + buffer.position(buffer.limit());//make it look as if each field were appended + return buffer; + } + + public static byte[] copyToNewByteArray(final Cell cell) { + int v1Length = length(cell); + byte[] backingBytes = new byte[v1Length]; + appendToByteArray(cell, backingBytes, 0); + return backingBytes; + } + + protected static int appendKeyToByteArrayWithoutValue(final Cell cell, final byte[] output, + final int offset) { + int nextOffset = offset; + nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength()); + nextOffset = CellTool.copyRowTo(cell, output, nextOffset); + nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength()); + nextOffset = CellTool.copyFamilyTo(cell, output, nextOffset); + nextOffset = CellTool.copyQualifierTo(cell, output, nextOffset); + nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp()); + nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte()); + return nextOffset; + } + + + /**************** copy key and value *********************/ + + public static int appendToByteArray(final Cell cell, final byte[] output, final int offset) { + int pos = offset; + pos = Bytes.putInt(output, pos, keyLength(cell)); + pos = Bytes.putInt(output, pos, cell.getValueLength()); + pos = appendKeyToByteArrayWithoutValue(cell, output, pos); + CellTool.copyValueTo(cell, output, pos); + return pos + cell.getValueLength(); + } + + public static ByteBuffer copyToNewByteBuffer(final Cell cell) { + byte[] bytes = new byte[length(cell)]; + appendToByteArray(cell, bytes, 0); + ByteBuffer buffer = ByteBuffer.wrap(bytes); + buffer.position(buffer.limit());//make it look as if each field were appended + return buffer; + } + + public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv, + final boolean includeMvccVersion) { + // keep pushing the limit out. assume enough capacity + bb.limit(bb.position() + kv.getLength()); + bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength()); + if (includeMvccVersion) { + int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getMvccVersion()); + ByteBufferUtils.extendLimit(bb, numMvccVersionBytes); + ByteBufferUtils.writeVLong(bb, kv.getMvccVersion()); + } + } + + + /**************** iterating *******************************/ + + /** + * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's + * position to the start of the next KeyValue. Does not allocate a new array or copy data. + */ + public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion) { + if (bb.isDirect()) { + throw new IllegalArgumentException("only supports heap buffers"); + } + if (bb.remaining() < 1) { + return null; + } + int underlyingArrayOffset = bb.arrayOffset() + bb.position(); + int keyLength = bb.getInt(); + int valueLength = bb.getInt(); + int kvLength = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keyLength + valueLength; + KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); + ByteBufferUtils.skip(bb, keyLength + valueLength); + if (includesMvccVersion) { + long mvccVersion = ByteBufferUtils.readVLong(bb); + keyValue.setMvccVersion(mvccVersion); + } + return keyValue; + } + + + /*************** next/previous **********************************/ + + /** + * Append single byte 0x00 to the end of the input row key + */ + public static KeyValue createFirstKeyInNextRow(final Cell in){ + byte[] nextRow = new byte[in.getRowLength() + 1]; + System.arraycopy(in.getRowArray(), in.getRowOffset(), nextRow, 0, in.getRowLength()); + nextRow[nextRow.length - 1] = 0;//maybe not necessary + return KeyValue.createFirstOnRow(nextRow); + } + + /** + * Increment the row bytes and clear the other fields + */ + public static KeyValue createFirstKeyInIncrementedRow(final Cell in){ + byte[] thisRow = new ByteRange(in.getRowArray(), in.getRowOffset(), in.getRowLength()) + .deepCopyToNewArray(); + byte[] nextRow = Bytes.unsignedCopyAndIncrement(thisRow); + return KeyValue.createFirstOnRow(nextRow); + } + + /** + * Decrement the timestamp. For tests (currently wasteful) + * + * Remember timestamps are sorted reverse chronologically. + * @param in + * @return + */ + public static KeyValue previousKey(final KeyValue in) { + return KeyValue.createFirstOnRow(CellTool.getRowArray(in), CellTool.getFamilyArray(in), + CellTool.getQualifierArray(in), in.getTimestamp() - 1); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java index cedd6bea831..b2ce35ada60 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java @@ -162,7 +162,7 @@ public interface DataBlockEncoder { */ public ByteBuffer getValueShallowCopy(); - /** @return key value at current position. */ + /** @return key value at current position with position set to limit */ public ByteBuffer getKeyValueBuffer(); /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java similarity index 97% rename from hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java rename to hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java index 9c842e82dc2..475fe1751de 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java @@ -24,7 +24,6 @@ import java.nio.ByteBuffer; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; -import org.apache.hadoop.hbase.io.hfile.HFileBlock; /** * A default implementation of {@link HFileBlockDecodingContext}. It assumes the diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java similarity index 92% rename from hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java rename to hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java index ae00575a89b..e7164bc09a0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java @@ -25,10 +25,11 @@ import java.io.IOException; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.BlockType; -import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.Compressor; +import com.google.common.base.Preconditions; + /** * A default implementation of {@link HFileBlockEncodingContext}. It will * compress the data section as one continuous buffer. @@ -85,28 +86,15 @@ public class HFileBlockDefaultEncodingContext implements + compressionAlgorithm, e); } } - if (headerBytes == null) { - dummyHeader = HFileBlock.DUMMY_HEADER; - } else { - dummyHeader = headerBytes; - } - } - - /** - * @param compressionAlgorithm compression algorithm - * @param encoding encoding - */ - public HFileBlockDefaultEncodingContext( - Compression.Algorithm compressionAlgorithm, - DataBlockEncoding encoding) { - this(compressionAlgorithm, encoding, null); + dummyHeader = Preconditions.checkNotNull(headerBytes, + "Please pass HFileBlock.DUMMY_HEADER instead of null for param headerBytes"); } /** * prepare to start a new encoding. * @throws IOException */ - void prepareEncoding() throws IOException { + public void prepareEncoding() throws IOException { encodedStream.reset(); dataOut.write(dummyHeader); if (encodingAlgo != null diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java index 55ba2334f4b..0072f5d9110 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java @@ -17,6 +17,7 @@ package org.apache.hadoop.hbase.io.encoding; import java.io.IOException; +import java.io.OutputStream; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.BlockType; @@ -30,6 +31,11 @@ import org.apache.hadoop.hbase.io.hfile.BlockType; */ public interface HFileBlockEncodingContext { + /** + * @return OutputStream to which encoded data is written + */ + public OutputStream getOutputStreamForEncoder(); + /** * @return encoded and compressed bytes with header which are ready to write * out to disk diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ArrayUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ArrayUtils.java new file mode 100644 index 00000000000..eae0049b114 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ArrayUtils.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.util.ArrayList; +import java.util.Arrays; + +public class ArrayUtils { + + public static int length(byte[] a) { + if (a == null) { + return 0; + } + return a.length; + } + + public static int length(long[] a) { + if (a == null) { + return 0; + } + return a.length; + } + + public static int length(Object[] a) { + if (a == null) { + return 0; + } + return a.length; + } + + public static boolean isEmpty(byte[] a) { + if (a == null) { + return true; + } + if (a.length == 0) { + return true; + } + return false; + } + + public static boolean isEmpty(long[] a) { + if (a == null) { + return true; + } + if (a.length == 0) { + return true; + } + return false; + } + + public static boolean isEmpty(Object[] a) { + if (a == null) { + return true; + } + if (a.length == 0) { + return true; + } + return false; + } + + public static long getFirst(long[] a) { + return a[0]; + } + + public static long getLast(long[] a) { + return a[a.length - 1]; + } + + public static int getTotalLengthOfArrays(Iterable arrays) { + if (arrays == null) { + return 0; + } + int length = 0; + for (byte[] bytes : arrays) { + length += length(bytes); + } + return length; + } + + public static ArrayList toList(long[] array){ + int length = length(array); + ArrayList list = new ArrayList(length); + for(int i=0; i < length; ++i){ + list.add(array[i]); + } + return list; + } + + public static byte[] growIfNecessary(byte[] array, int minLength, int numAdditionalBytes) { + if(array.length >= minLength){ + return array; + } + return Arrays.copyOf(array, minLength + numAdditionalBytes); + } + + public static int[] growIfNecessary(int[] array, int minLength, int numAdditionalInts) { + if(array.length >= minLength){ + return array; + } + return Arrays.copyOf(array, minLength + numAdditionalInts); + } + + public static long[] growIfNecessary(long[] array, int minLength, int numAdditionalLongs) { + if(array.length >= minLength){ + return array; + } + return Arrays.copyOf(array, minLength + numAdditionalLongs); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java similarity index 90% rename from hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java rename to hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java index 21481bf3b5b..a47a2ae2002 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteBufferUtils.java @@ -16,6 +16,7 @@ */ package org.apache.hadoop.hbase.util; +import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataInputStream; import java.io.IOException; @@ -25,8 +26,7 @@ import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.hbase.io.encoding. - EncoderBufferTooSmallException; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.WritableUtils; /** @@ -299,23 +299,6 @@ public final class ByteBufferUtils { return tmpLength; } - /** - * Asserts that there is at least the given amount of unfilled space - * remaining in the given buffer. - * @param out typically, the buffer we are writing to - * @param length the required space in the buffer - * @throws EncoderBufferTooSmallException If there are no enough bytes. - */ - public static void ensureSpace(ByteBuffer out, int length) - throws EncoderBufferTooSmallException { - if (out.position() + length > out.limit()) { - throw new EncoderBufferTooSmallException( - "Buffer position=" + out.position() + - ", buffer limit=" + out.limit() + - ", length to be written=" + length); - } - } - /** * Copy the given number of bytes from the given stream and put it at the * current position of the given buffer, updating the position in the buffer. @@ -335,6 +318,17 @@ public final class ByteBufferUtils { } } } + + /** + * Copy from the InputStream to a new heap ByteBuffer until the InputStream is exhausted. + */ + public static ByteBuffer drainInputStreamToBuffer(InputStream is) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(4096); + IOUtils.copyBytes(is, baos, 4096, true); + ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray()); + buffer.rewind(); + return buffer; + } /** * Copy from one buffer to another from given offset @@ -440,4 +434,24 @@ public final class ByteBufferUtils { buffer.position(buffer.position() + length); } + public static void extendLimit(ByteBuffer buffer, int numBytes) { + buffer.limit(buffer.limit() + numBytes); + } + + /** + * Copy the bytes from position to limit into a new byte[] of the exact length and sets the + * position and limit back to their original values (though not thread safe). + * @param buffer copy from here + * @param startPosition put buffer.get(startPosition) into byte[0] + * @return a new byte[] containing the bytes in the specified range + */ + public static byte[] toBytes(ByteBuffer buffer, int startPosition) { + int originalPosition = buffer.position(); + byte[] output = new byte[buffer.limit() - startPosition]; + buffer.position(startPosition); + buffer.get(output); + buffer.position(originalPosition); + return output; + } + } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java new file mode 100644 index 00000000000..df3d29c5ba8 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRange.java @@ -0,0 +1,301 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + + + + +/** + * Lightweight, reusable class for specifying ranges of byte[]'s. CompareTo and equals methods are + * lexicographic, which is native to HBase. + *

+ * This class differs from ByteBuffer: + *

  • On-heap bytes only + *
  • Implements equals, hashCode, and compareTo so that it can be used in standard java + * Collections, similar to String. + *
  • Does not maintain mark/position iterator state inside the class. Doing so leads to many bugs + * in complex applications. + *
  • Allows the addition of simple core methods like this.copyTo(that, offset). + *
  • Can be reused in tight loops like a major compaction which can save significant amounts of + * garbage. + *
  • (Without reuse, we throw off garbage like this thing: + * http://www.youtube.com/watch?v=lkmBH-MjZF4 + *

    + * Mutable, and always evaluates equals, hashCode, and compareTo based on the current contents. + *

    + * Can contain convenience methods for comparing, printing, cloning, spawning new arrays, copying to + * other arrays, etc. Please place non-core methods into {@link ByteRangeTool}. + *

    + * We may consider converting this to an interface and creating separate implementations for a + * single byte[], a paged byte[] (growable byte[][]), a ByteBuffer, etc + */ +public class ByteRange implements Comparable { + + private static final int UNSET_HASH_VALUE = -1; + + + /********************** fields *****************************/ + + // Do not make these final, as the intention is to reuse objects of this class + + /** + * The array containing the bytes in this range. It will be >= length. + */ + private byte[] bytes; + + /** + * The index of the first byte in this range. ByteRange.get(0) will return bytes[offset]. + */ + private int offset; + + /** + * The number of bytes in the range. Offset + length must be <= bytes.length + */ + private int length; + + /** + * Variable for lazy-caching the hashCode of this range. Useful for frequently used ranges, + * long-lived ranges, or long ranges. + */ + private int hash = UNSET_HASH_VALUE; + + + /********************** construct ***********************/ + + public ByteRange() { + set(new byte[0]);//Could probably get away with a null array if the need arises. + } + + public ByteRange(byte[] bytes) { + set(bytes); + } + + public ByteRange(byte[] bytes, int offset, int length) { + set(bytes, offset, length); + } + + + /********************** write methods *************************/ + + public ByteRange clear() { + clearHashCache(); + bytes = null; + offset = 0; + length = 0; + return this; + } + + public ByteRange set(byte[] bytes) { + clearHashCache(); + this.bytes = bytes; + this.offset = 0; + this.length = ArrayUtils.length(bytes); + return this; + } + + public ByteRange set(byte[] bytes, int offset, int length) { + clearHashCache(); + this.bytes = bytes; + this.offset = offset; + this.length = length; + return this; + } + + public void setLength(int length) { + clearHashCache(); + this.length = length; + } + + + /*********** read methods (add non-core methods to ByteRangeUtils) *************/ + + /** + * @param index zero-based index + * @return single byte at index + */ + public byte get(int index) { + return bytes[offset + index]; + } + + /** + * Instantiate a new byte[] with exact length, which is at least 24 bytes + length. Copy the + * contents of this range into it. + * @return The newly cloned byte[]. + */ + public byte[] deepCopyToNewArray() { + byte[] result = new byte[length]; + System.arraycopy(bytes, offset, result, 0, length); + return result; + } + + /** + * Create a new ByteRange with new backing byte[] and copy the state of this range into the new + * range. Copy the hash over if it is already calculated. + * @return + */ + public ByteRange deepCopy() { + ByteRange clone = new ByteRange(deepCopyToNewArray()); + if (isHashCached()) { + clone.hash = hash; + } + return clone; + } + + /** + * Wrapper for System.arraycopy. Copy the contents of this range into the provided array. + * @param destination Copy to this array + * @param destinationOffset First index in the destination array. + * @return void to avoid confusion between which ByteRange should be returned + */ + public void deepCopyTo(byte[] destination, int destinationOffset) { + System.arraycopy(bytes, offset, destination, destinationOffset, length); + } + + /** + * Wrapper for System.arraycopy. Copy the contents of this range into the provided array. + * @param innerOffset Start copying from this index in this source ByteRange. First byte copied is + * bytes[offset + innerOffset] + * @param copyLength Copy this many bytes + * @param destination Copy to this array + * @param destinationOffset First index in the destination array. + * @return void to avoid confusion between which ByteRange should be returned + */ + public void deepCopySubRangeTo(int innerOffset, int copyLength, byte[] destination, + int destinationOffset) { + System.arraycopy(bytes, offset + innerOffset, destination, destinationOffset, copyLength); + } + + /** + * Create a new ByteRange that points at this range's byte[]. The new range can have different + * values for offset and length, but modifying the shallowCopy will modify the bytes in this + * range's array. Pass over the hash code if it is already cached. + * @param innerOffset First byte of clone will be this.offset + copyOffset. + * @param copyLength Number of bytes in the clone. + * @return new ByteRange object referencing this range's byte[]. + */ + public ByteRange shallowCopySubRange(int innerOffset, int copyLength) { + ByteRange clone = new ByteRange(bytes, offset + innerOffset, copyLength); + if (isHashCached()) { + clone.hash = hash; + } + return clone; + } + + //TODO move to ByteRangeUtils because it is non-core method + public int numEqualPrefixBytes(ByteRange that, int thatInnerOffset) { + int maxCompares = Math.min(length, that.length - thatInnerOffset); + for (int i = 0; i < maxCompares; ++i) { + if (bytes[offset + i] != that.bytes[that.offset + thatInnerOffset + i]) { + return i; + } + } + return maxCompares; + } + + public byte[] getBytes() { + return bytes; + } + + public int getOffset() { + return offset; + } + + public int getLength() { + return length; + } + + public boolean isEmpty(){ + return isEmpty(this); + } + + public boolean notEmpty(){ + return notEmpty(this); + } + + + /******************* static methods ************************/ + + public static boolean isEmpty(ByteRange range){ + return range == null || range.length == 0; + } + + public static boolean notEmpty(ByteRange range){ + return range != null && range.length > 0; + } + + /******************* standard methods *********************/ + + @Override + public boolean equals(Object thatObject) { + if (thatObject == null){ + return false; + } + if (this == thatObject) { + return true; + } + if (hashCode() != thatObject.hashCode()) { + return false; + } + if (!(thatObject instanceof ByteRange)) { + return false; + } + ByteRange that = (ByteRange) thatObject; + return Bytes.equals(bytes, offset, length, that.bytes, that.offset, that.length); + } + + @Override + public int hashCode() { + if (isHashCached()) {// hash is already calculated and cached + return hash; + } + if (this.isEmpty()) {// return 0 for empty ByteRange + hash = 0; + return hash; + } + int off = offset; + hash = 0; + for (int i = 0; i < length; i++) { + hash = 31 * hash + bytes[off++]; + } + return hash; + } + + private boolean isHashCached() { + return hash != UNSET_HASH_VALUE; + } + + private void clearHashCache() { + hash = UNSET_HASH_VALUE; + } + + /** + * Bitwise comparison of each byte in the array. Unsigned comparison, not paying attention to + * java's signed bytes. + */ + @Override + public int compareTo(ByteRange other) { + return Bytes.compareTo(bytes, offset, length, other.bytes, other.offset, other.length); + } + + @Override + public String toString() { + return Bytes.toStringBinary(bytes, offset, length); + } + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java new file mode 100644 index 00000000000..5dfe86bba0f --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ByteRangeTool.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.util.ArrayList; +import java.util.Collection; + +import com.google.common.collect.Lists; + +/** + * Utility methods {@link ByteRange}. + */ +public class ByteRangeTool { + + public static ArrayList copyToNewArrays(Collection ranges) { + if (ranges == null) { + return new ArrayList(0); + } + ArrayList arrays = Lists.newArrayListWithCapacity(ranges.size()); + for (ByteRange range : ranges) { + arrays.add(range.deepCopyToNewArray()); + } + return arrays; + } + + public static ArrayList fromArrays(Collection arrays) { + if (arrays == null) { + return new ArrayList(0); + } + ArrayList ranges = Lists.newArrayListWithCapacity(arrays.size()); + for (byte[] array : arrays) { + ranges.add(new ByteRange(array)); + } + return ranges; + } + +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index a6e2e696b70..015c9eb8d7e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -1647,7 +1647,7 @@ public class Bytes { return toString(b, 0, n); } - + /** * Copy the byte array given in parameter and return an instance * of a new byte array with the same length and the same content. @@ -1660,4 +1660,62 @@ public class Bytes { System.arraycopy(bytes, 0, result, 0, bytes.length); return result; } + + /** + * Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from + * somewhere. (mcorgan) + * @param a Array to search. Entries must be sorted and unique. + * @param fromIndex First index inclusive of "a" to include in the search. + * @param toIndex Last index exclusive of "a" to include in the search. + * @param key The byte to search for. + * @return The index of key if found. If not found, return -(index + 1), where negative indicates + * "not found" and the "index + 1" handles the "-0" case. + */ + public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) { + int unsignedKey = key & 0xff; + int low = fromIndex; + int high = toIndex - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int midVal = a[mid] & 0xff; + + if (midVal < unsignedKey) { + low = mid + 1; + } else if (midVal > unsignedKey) { + high = mid - 1; + } else { + return mid; // key found + } + } + return -(low + 1); // key not found. + } + + /** + * Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding + * 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes. + * + * @param input The byte[] to increment. + * @return The incremented copy of "in". May be same length or 1 byte longer. + */ + public static byte[] unsignedCopyAndIncrement(final byte[] input) { + byte[] copy = copy(input); + if (copy == null) { + throw new IllegalArgumentException("cannot increment null array"); + } + for (int i = copy.length - 1; i >= 0; --i) { + if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum + copy[i] = 0; + } else { + ++copy[i]; + return copy; + } + } + // we maxed out the array + byte[] out = new byte[copy.length + 1]; + out[0] = 1; + System.arraycopy(copy, 0, out, 1, copy.length); + return out; + } + } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CollectionUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CollectionUtils.java new file mode 100644 index 00000000000..d3c32c3ede3 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/CollectionUtils.java @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +/** + * Utility methods for dealing with Collections, including treating null collections as empty. + */ +public class CollectionUtils { + + private static final List EMPTY_LIST = Collections.unmodifiableList( + new ArrayList(0)); + + + @SuppressWarnings("unchecked") + public static Collection nullSafe(Collection in) { + if (in == null) { + return (Collection)EMPTY_LIST; + } + return in; + } + + /************************ size ************************************/ + + public static int nullSafeSize(Collection collection) { + if (collection == null) { + return 0; + } + return collection.size(); + } + + public static boolean nullSafeSameSize(Collection a, Collection b) { + return nullSafeSize(a) == nullSafeSize(b); + } + + /*************************** empty ****************************************/ + + public static boolean isEmpty(Collection collection) { + return collection == null || collection.isEmpty(); + } + + public static boolean notEmpty(Collection collection) { + return !isEmpty(collection); + } + + /************************ first/last **************************/ + + public static T getFirst(Collection collection) { + if (CollectionUtils.isEmpty(collection)) { + return null; + } + for (T t : collection) { + return t; + } + return null; + } + + /** + * @param list any list + * @return -1 if list is empty, otherwise the max index + */ + public static int getLastIndex(List list){ + if(isEmpty(list)){ + return -1; + } + return list.size() - 1; + } + + /** + * @param list + * @param index the index in question + * @return true if it is the last index or if list is empty and -1 is passed for the index param + */ + public static boolean isLastIndex(List list, int index){ + return index == getLastIndex(list); + } + + public static T getLast(List list) { + if (isEmpty(list)) { + return null; + } + return list.get(list.size() - 1); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/KeyValueTestUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IterableUtils.java similarity index 53% rename from hbase-server/src/test/java/org/apache/hadoop/hbase/KeyValueTestUtil.java rename to hbase-common/src/main/java/org/apache/hadoop/hbase/util/IterableUtils.java index af9032bf96f..c0559371a3d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/KeyValueTestUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/IterableUtils.java @@ -1,5 +1,4 @@ /* - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -17,37 +16,26 @@ * limitations under the License. */ -package org.apache.hadoop.hbase; +package org.apache.hadoop.hbase.util; -import org.apache.hadoop.hbase.util.Bytes; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; -public class KeyValueTestUtil { +/** + * Utility methods for Iterable including null-safe handlers. + */ +public class IterableUtils { - public static KeyValue create( - String row, - String family, - String qualifier, - long timestamp, - String value) - { - return create(row, family, qualifier, timestamp, KeyValue.Type.Put, value); + private static final List EMPTY_LIST = Collections + .unmodifiableList(new ArrayList(0)); + + @SuppressWarnings("unchecked") + public static Iterable nullSafe(Iterable in) { + if (in == null) { + return (List) EMPTY_LIST; + } + return in; } - public static KeyValue create( - String row, - String family, - String qualifier, - long timestamp, - KeyValue.Type type, - String value) - { - return new KeyValue( - Bytes.toBytes(row), - Bytes.toBytes(family), - Bytes.toBytes(qualifier), - timestamp, - type, - Bytes.toBytes(value) - ); - } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/Strings.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Strings.java similarity index 70% rename from hbase-server/src/main/java/org/apache/hadoop/hbase/util/Strings.java rename to hbase-common/src/main/java/org/apache/hadoop/hbase/util/Strings.java index 5ab02228829..3edb76c81e6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/Strings.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Strings.java @@ -76,4 +76,41 @@ public class Strings { return null; return dnPtr.endsWith(".") ? dnPtr.substring(0, dnPtr.length()-1) : dnPtr; } + + /** + * Null-safe length check. + * @param input + * @return true if null or length==0 + */ + public static boolean isEmpty(String input) { + return input == null || input.length() == 0; + } + + /** + * Push the input string to the right by appending a character before it, usually a space. + * @param input the string to pad + * @param padding the character to repeat to the left of the input string + * @param length the desired total length including the padding + * @return padding characters + input + */ + public static String padFront(String input, char padding, int length) { + if (input.length() > length) { + throw new IllegalArgumentException("input \"" + input + "\" longer than maxLength=" + length); + } + int numPaddingCharacters = length - input.length(); + return repeat(padding, numPaddingCharacters) + input; + } + + /** + * @param c repeat this character + * @param reapeatFor the length of the output String + * @return c, repeated repeatFor times + */ + public static String repeat(char c, int reapeatFor) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < reapeatFor; ++i) { + sb.append(c); + } + return sb.toString(); + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/LoadTestKVGenerator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/LoadTestKVGenerator.java index 260cbaab764..63b1aa47576 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/LoadTestKVGenerator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/LoadTestKVGenerator.java @@ -80,8 +80,10 @@ public class LoadTestKVGenerator { */ public byte[] generateRandomSizeValue(long key, String qual) { String rowKey = md5PrefixedKey(key); - int dataSize = minValueSize + randomForValueSize.nextInt( - Math.abs(maxValueSize - minValueSize)); + int dataSize = minValueSize; + if(minValueSize != maxValueSize){ + dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize)); + } return getValueForRowColumn(rowKey, qual, dataSize); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java similarity index 99% rename from hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java rename to hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java index 07dcb630ad0..51932e056b6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/RedundantKVGenerator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java @@ -14,7 +14,7 @@ * License for the specific language governing permissions and limitations * under the License. */ -package org.apache.hadoop.hbase.io.encoding; +package org.apache.hadoop.hbase.util.test; import java.nio.ByteBuffer; import java.util.ArrayList; diff --git a/hbase-common/src/main/java/org/apache/hbase/Cell.java b/hbase-common/src/main/java/org/apache/hbase/Cell.java new file mode 100644 index 00000000000..8247babe008 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/Cell.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hbase.cell.CellTool; + + +/** + * The unit of storage in HBase consisting of the following fields:
    + *
    + * 1) row
    + * 2) column family
    + * 3) column qualifier
    + * 4) timestamp
    + * 5) type
    + * 6) MVCC version
    + * 7) value
    + * 
    + *

    + * Uniqueness is determined by the combination of row, column family, column qualifier, + * timestamp, and type. + *

    + * The natural comparator will perform a bitwise comparison on row, column family, and column + * qualifier. Less intuitively, it will then treat the greater timestamp as the lesser value with + * the goal of sorting newer cells first. + *

    + * This interface does not include methods that allocate new byte[]'s such as those used in client + * or debugging code. These should be placed in a sub-interface or the {@link CellTool} class. + *

    + * Cell implements Comparable which is only meaningful when comparing to other keys in the + * same table. It uses {@link #CellComparator} which does not work on the -ROOT- and .META. tables. + *

    + * In the future, we may consider adding a boolean isOnHeap() method and a getValueBuffer() method + * that can be used to pass a value directly from an off-heap ByteBuffer to the network without + * copying into an on-heap byte[]. + *

    + * Historic note: the original Cell implementation (KeyValue) requires that all fields be encoded as + * consecutive bytes in the same byte[], whereas this interface allows fields to reside in separate + * byte[]'s. + *

    + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface Cell { + + //1) Row + + /** + * Contiguous raw bytes that may start at any index in the containing array. Max length is + * Short.MAX_VALUE which is 32,767 bytes. + * @return The array containing the row bytes. + */ + byte[] getRowArray(); + + /** + * @return Array index of first row byte + */ + int getRowOffset(); + + /** + * @return Number of row bytes. Must be < rowArray.length - offset. + */ + short getRowLength(); + + + //2) Family + + /** + * Contiguous bytes composed of legal HDFS filename characters which may start at any index in the + * containing array. Max length is Byte.MAX_VALUE, which is 127 bytes. + * @return the array containing the family bytes. + */ + byte[] getFamilyArray(); + + /** + * @return Array index of first row byte + */ + int getFamilyOffset(); + + /** + * @return Number of family bytes. Must be < familyArray.length - offset. + */ + byte getFamilyLength(); + + + //3) Qualifier + + /** + * Contiguous raw bytes that may start at any index in the containing array. Max length is + * Short.MAX_VALUE which is 32,767 bytes. + * @return The array containing the qualifier bytes. + */ + byte[] getQualifierArray(); + + /** + * @return Array index of first qualifier byte + */ + int getQualifierOffset(); + + /** + * @return Number of qualifier bytes. Must be < qualifierArray.length - offset. + */ + int getQualifierLength(); + + + //4) Timestamp + + /** + * @return Long value representing time at which this cell was "Put" into the row. Typically + * represents the time of insertion, but can be any value from Long.MIN_VALUE to Long.MAX_VALUE. + */ + long getTimestamp(); + + + //5) Type + + /** + * see {@link #KeyValue.TYPE} + * @return The byte representation of the KeyValue.TYPE of this cell: one of Put, Delete, etc + */ + byte getTypeByte(); + + + //6) MvccVersion + + /** + * Internal use only. A region-specific sequence ID given to each operation. It always exists for + * cells in the memstore but is not retained forever. It may survive several flushes, but + * generally becomes irrelevant after the cell's row is no longer involved in any operations that + * require strict consistency. + * @return mvccVersion (always >= 0 if exists), or 0 if it no longer exists + */ + long getMvccVersion(); + + + //7) Value + + /** + * Contiguous raw bytes that may start at any index in the containing array. Max length is + * Integer.MAX_VALUE which is 2,147,483,648 bytes. + * @return The array containing the value bytes. + */ + byte[] getValueArray(); + + /** + * @return Array index of first value byte + */ + int getValueOffset(); + + /** + * @return Number of value bytes. Must be < valueArray.length - offset. + */ + int getValueLength(); + +} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java new file mode 100644 index 00000000000..19d6b54b437 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellComparator.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.cell; + +import java.io.Serializable; +import java.util.Comparator; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hbase.Cell; + +import com.google.common.primitives.Longs; + +/** + * Compare two traditional HBase cells. + * + * Note: This comparator is not valid for -ROOT- and .META. tables. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class CellComparator implements Comparator, Serializable{ + private static final long serialVersionUID = -8760041766259623329L; + + @Override + public int compare(Cell a, Cell b) { + return compareStatic(a, b); + } + + + public static int compareStatic(Cell a, Cell b) { + //row + int c = Bytes.compareTo( + a.getRowArray(), a.getRowOffset(), a.getRowLength(), + b.getRowArray(), b.getRowOffset(), b.getRowLength()); + if (c != 0) return c; + + //family + c = Bytes.compareTo( + a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), + b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); + if (c != 0) return c; + + //qualifier + c = Bytes.compareTo( + a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), + b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); + if (c != 0) return c; + + //timestamp: later sorts first + c = -Longs.compare(a.getTimestamp(), b.getTimestamp()); + if (c != 0) return c; + + //type + c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte()); + if (c != 0) return c; + + //mvccVersion: later sorts first + return -Longs.compare(a.getMvccVersion(), b.getMvccVersion()); + } + + + /**************** equals ****************************/ + + public static boolean equals(Cell a, Cell b){ + if (!areKeyLengthsEqual(a, b)) { + return false; + } + //TODO compare byte[]'s in reverse since later bytes more likely to differ + return 0 == compareStatic(a, b); + } + + public static boolean equalsRow(Cell a, Cell b){ + if(!areRowLengthsEqual(a, b)){ + return false; + } + return 0 == Bytes.compareTo( + a.getRowArray(), a.getRowOffset(), a.getRowLength(), + b.getRowArray(), b.getRowOffset(), b.getRowLength()); + } + + + /********************* hashCode ************************/ + + /** + * Returns a hash code that is always the same for two Cells having a matching equals(..) result. + * Currently does not guard against nulls, but it could if necessary. + */ + public static int hashCode(Cell cell){ + if (cell == null) {// return 0 for empty Cell + return 0; + } + + //pre-calculate the 3 hashes made of byte ranges + int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); + int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); + int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); + + //combine the 6 sub-hashes + int hash = 31 * rowHash + familyHash; + hash = 31 * hash + qualifierHash; + hash = 31 * hash + (int)cell.getTimestamp(); + hash = 31 * hash + cell.getTypeByte(); + hash = 31 * hash + (int)cell.getMvccVersion(); + return hash; + } + + + /******************** lengths *************************/ + + public static boolean areKeyLengthsEqual(Cell a, Cell b) { + return a.getRowLength() == b.getRowLength() + && a.getFamilyLength() == b.getFamilyLength() + && a.getQualifierLength() == b.getQualifierLength(); + } + + public static boolean areRowLengthsEqual(Cell a, Cell b) { + return a.getRowLength() == b.getRowLength(); + } + + + /***************** special cases ****************************/ + + /** + * special case for KeyValue.equals + */ + private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) { + //row + int c = Bytes.compareTo( + a.getRowArray(), a.getRowOffset(), a.getRowLength(), + b.getRowArray(), b.getRowOffset(), b.getRowLength()); + if (c != 0) return c; + + //family + c = Bytes.compareTo( + a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), + b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); + if (c != 0) return c; + + //qualifier + c = Bytes.compareTo( + a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), + b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); + if (c != 0) return c; + + //timestamp: later sorts first + c = -Longs.compare(a.getTimestamp(), b.getTimestamp()); + if (c != 0) return c; + + //type + c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte()); + return c; + } + + /** + * special case for KeyValue.equals + */ + public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){ + return 0 == compareStaticIgnoreMvccVersion(a, b); + } + +} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java new file mode 100644 index 00000000000..fcaf27e7f5f --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellOutputStream.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.cell; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hbase.Cell; + +/** + * Accepts a stream of Cells and adds them to its internal data structure. This can be used to build + * a block of cells during compactions and flushes, or to build a byte[] to send to the client. This + * could be backed by a List, but more efficient implementations will append results to a + * byte[] to eliminate overhead, and possibly encode the cells further. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface CellOutputStream { + + /** + * Implementation must copy the entire state of the Cell. If the appended Cell is modified + * immediately after the append method returns, the modifications must have absolutely no effect + * on the copy of the Cell that was added to the appender. For example, calling someList.add(cell) + * is not correct. + */ + void write(Cell cell); + + /** + * Let the implementation decide what to do. Usually means writing accumulated data into a byte[] + * that can then be read from the implementation to be sent to disk, put in the block cache, or + * sent over the network. + */ + void flush(); + +} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java new file mode 100644 index 00000000000..eeadf5fa863 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellScannerPosition.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.cell; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * An indicator of the state of the scanner after an operation such as nextCell() or positionAt(..). + * For example: + *

      + *
    • In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that + * it should load the next block.
    • + *
    • In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.
    • + *
    • In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the + * next region.
    • + *
    + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum CellScannerPosition { + + /** + * getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first + * cell. + */ + BEFORE_FIRST, + + /** + * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), + * rather it is the nearest cell before the requested cell. + */ + BEFORE, + + /** + * getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by + * positionAt(..). + */ + AT, + + /** + * getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..), + * rather it is the nearest cell after the requested cell. + */ + AFTER, + + /** + * getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect. + */ + AFTER_LAST + +} diff --git a/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java b/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java new file mode 100644 index 00000000000..229ca3669d8 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hbase/cell/CellTool.java @@ -0,0 +1,118 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hbase.cell; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.ByteRange; +import org.apache.hbase.Cell; + +@InterfaceAudience.Private +@InterfaceStability.Evolving +public final class CellTool { + + /******************* ByteRange *******************************/ + + public static ByteRange fillRowRange(Cell cell, ByteRange range) { + return range.set(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); + } + + public static ByteRange fillFamilyRange(Cell cell, ByteRange range) { + return range.set(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); + } + + public static ByteRange fillQualifierRange(Cell cell, ByteRange range) { + return range.set(cell.getQualifierArray(), cell.getQualifierOffset(), + cell.getQualifierLength()); + } + + + /***************** get individual arrays for tests ************/ + + public static byte[] getRowArray(Cell cell){ + byte[] output = new byte[cell.getRowLength()]; + copyRowTo(cell, output, 0); + return output; + } + + public static byte[] getFamilyArray(Cell cell){ + byte[] output = new byte[cell.getFamilyLength()]; + copyFamilyTo(cell, output, 0); + return output; + } + + public static byte[] getQualifierArray(Cell cell){ + byte[] output = new byte[cell.getQualifierLength()]; + copyQualifierTo(cell, output, 0); + return output; + } + + public static byte[] getValueArray(Cell cell){ + byte[] output = new byte[cell.getValueLength()]; + copyValueTo(cell, output, 0); + return output; + } + + + /******************** copyTo **********************************/ + + public static int copyRowTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getRowArray(), cell.getRowOffset(), destination, destinationOffset, + cell.getRowLength()); + return destinationOffset + cell.getRowLength(); + } + + public static int copyFamilyTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getFamilyArray(), cell.getFamilyOffset(), destination, destinationOffset, + cell.getFamilyLength()); + return destinationOffset + cell.getFamilyLength(); + } + + public static int copyQualifierTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getQualifierArray(), cell.getQualifierOffset(), destination, + destinationOffset, cell.getQualifierLength()); + return destinationOffset + cell.getQualifierLength(); + } + + public static int copyValueTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getValueArray(), cell.getValueOffset(), destination, destinationOffset, + cell.getValueLength()); + return destinationOffset + cell.getValueLength(); + } + + + /********************* misc *************************************/ + + public static byte getRowByte(Cell cell, int index) { + return cell.getRowArray()[cell.getRowOffset() + index]; + } + + + /********************** KeyValue (move to KeyValueUtils) *********************/ + + public static ByteBuffer getValueBufferShallowCopy(Cell cell) { + ByteBuffer buffer = ByteBuffer.wrap(cell.getValueArray(), cell.getValueOffset(), + cell.getValueLength()); +// buffer.position(buffer.limit());//make it look as if value was appended + return buffer; + } + +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java new file mode 100644 index 00000000000..5b50cb8dcfb --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestByteRange.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.hbase.SmallTests; +import org.junit.experimental.categories.Category; + + +@Category(SmallTests.class) +public class TestByteRange extends TestCase { + + public void testEmpty(){ + Assert.assertTrue(ByteRange.isEmpty(null)); + ByteRange r = new ByteRange(); + Assert.assertTrue(ByteRange.isEmpty(r)); + Assert.assertFalse(ByteRange.notEmpty(r)); + Assert.assertTrue(r.isEmpty()); + Assert.assertFalse(r.notEmpty()); + Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior + Assert.assertEquals(0, r.getBytes().length); + Assert.assertEquals(0, r.getOffset()); + Assert.assertEquals(0, r.getLength()); + Assert.assertTrue(Bytes.equals(new byte[0], r.deepCopyToNewArray())); + Assert.assertEquals(0, r.compareTo(new ByteRange(new byte[0], 0, 0))); + Assert.assertEquals(0, r.hashCode()); + } + + public void testBasics(){ + ByteRange r = new ByteRange(new byte[]{1, 3, 2}); + Assert.assertFalse(ByteRange.isEmpty(r)); + Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior + Assert.assertEquals(3, r.getBytes().length); + Assert.assertEquals(0, r.getOffset()); + Assert.assertEquals(3, r.getLength()); + + //cloning (deep copying) + Assert.assertTrue(Bytes.equals(new byte[]{1, 3, 2}, r.deepCopyToNewArray())); + Assert.assertNotSame(r.getBytes(), r.deepCopyToNewArray()); + + //hash code + Assert.assertTrue(r.hashCode() > 0); + Assert.assertEquals(r.hashCode(), r.deepCopy().hashCode()); + + //copying to arrays + byte[] destination = new byte[]{-59};//junk + r.deepCopySubRangeTo(2, 1, destination, 0); + Assert.assertTrue(Bytes.equals(new byte[]{2}, destination)); + + //set length + r.setLength(1); + Assert.assertTrue(Bytes.equals(new byte[]{1}, r.deepCopyToNewArray())); + r.setLength(2);//verify we retained the 2nd byte, but dangerous in real code + Assert.assertTrue(Bytes.equals(new byte[]{1, 3}, r.deepCopyToNewArray())); + } + +} + diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java index 97ee8b76975..bc018a58bc7 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestBytes.java @@ -28,8 +28,10 @@ import java.util.Arrays; import java.util.Random; import junit.framework.TestCase; -import org.junit.experimental.categories.Category; + import org.apache.hadoop.hbase.SmallTests; +import org.junit.Assert; +import org.junit.experimental.categories.Category; @Category(SmallTests.class) @@ -376,7 +378,7 @@ public class TestBytes extends TestCase { assertEquals("World", Bytes.readStringFixedSize(dis, 18)); assertEquals("", Bytes.readStringFixedSize(dis, 9)); } - + public void testCopy() throws Exception { byte [] bytes = Bytes.toBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ"); byte [] copy = Bytes.copy(bytes); @@ -396,5 +398,32 @@ public class TestBytes extends TestCase { String bytes = Bytes.toStringBinary(Bytes.toBytes(2.17)); assertEquals(2.17, Bytes.toDouble(Bytes.toBytesBinary(bytes)), 0); } + + public void testUnsignedBinarySearch(){ + byte[] bytes = new byte[]{0,5,123,127,-128,-100,-1}; + Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)5), 1); + Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)127), 3); + Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-128), 4); + Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-100), 5); + Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-1), 6); + Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)2), -1-1); + Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-5), -6-1); + } + + public void testUnsignedIncrement(){ + byte[] a = Bytes.toBytes(0); + int a2 = Bytes.toInt(Bytes.unsignedCopyAndIncrement(a), 0); + Assert.assertTrue(a2==1); + + byte[] b = Bytes.toBytes(-1); + byte[] actual = Bytes.unsignedCopyAndIncrement(b); + Assert.assertNotSame(b, actual); + byte[] expected = new byte[]{1,0,0,0,0}; + Assert.assertArrayEquals(expected, actual); + + byte[] c = Bytes.toBytes(255);//should wrap to the next significant byte + int c2 = Bytes.toInt(Bytes.unsignedCopyAndIncrement(c), 0); + Assert.assertTrue(c2==256); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java index c97914391a5..7e71f94380b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java @@ -352,4 +352,21 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { } } + /** + * Asserts that there is at least the given amount of unfilled space + * remaining in the given buffer. + * @param out typically, the buffer we are writing to + * @param length the required space in the buffer + * @throws EncoderBufferTooSmallException If there are no enough bytes. + */ + protected static void ensureSpace(ByteBuffer out, int length) + throws EncoderBufferTooSmallException { + if (out.position() + length > out.limit()) { + throw new EncoderBufferTooSmallException( + "Buffer position=" + out.position() + + ", buffer limit=" + out.limit() + + ", length to be written=" + length); + } + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java index eca0554d0f4..039187de2b3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java @@ -233,8 +233,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { // create KeyValue buffer and fill it prefix int keyOffset = buffer.position(); - ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength - + KeyValue.ROW_OFFSET); + ensureSpace(buffer, keyLength + valueLength + KeyValue.ROW_OFFSET); buffer.putInt(keyLength); buffer.putInt(valueLength); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java index 6d1c5fb87ea..7c7c4b7660d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java @@ -230,8 +230,7 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder { } int commonLength = ByteBufferUtils.readCompressedInt(source); - ByteBufferUtils.ensureSpace(out, state.keyLength + state.valueLength + - KeyValue.ROW_OFFSET); + ensureSpace(out, state.keyLength + state.valueLength + KeyValue.ROW_OFFSET); int kvPos = out.position(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java index 3a2de8ee63a..925801a2321 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java @@ -126,8 +126,7 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder { int keyOffset; keyLength += commonLength; - ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength - + KeyValue.ROW_OFFSET); + ensureSpace(buffer, keyLength + valueLength + KeyValue.ROW_OFFSET); buffer.putInt(keyLength); buffer.putInt(valueLength); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 7a07b980428..8a665271cc7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -725,7 +725,7 @@ public class HFileBlock implements Cacheable { this.dataBlockEncoder = dataBlockEncoder != null ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE; defaultBlockEncodingCtx = - new HFileBlockDefaultEncodingContext(compressionAlgorithm, null); + new HFileBlockDefaultEncodingContext(compressionAlgorithm, null, DUMMY_HEADER); dataBlockEncodingCtx = this.dataBlockEncoder.newOnDiskDataBlockEncodingContext( compressionAlgorithm, DUMMY_HEADER); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java index 37fad21bc9a..dd2bbf7582d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; @@ -75,7 +76,7 @@ public class TestDataBlockEncoders { return encoder.newDataBlockEncodingContext(algo, encoding, HFileBlock.DUMMY_HEADER); } else { - return new HFileBlockDefaultEncodingContext(algo, encoding); + return new HFileBlockDefaultEncodingContext(algo, encoding, HFileBlock.DUMMY_HEADER); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java index 9d8b9acffe0..7d81a60df5e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java @@ -27,19 +27,17 @@ import java.util.Map; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MediumTests; -import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.CacheConfig; -import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.LruBlockCache; import org.apache.hadoop.hbase.regionserver.HRegion; -import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.MultiThreadedWriter; +import org.apache.hadoop.hbase.util.Strings; import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -59,9 +57,10 @@ public class TestEncodedSeekers { private static final byte[] CF_BYTES = Bytes.toBytes(CF_NAME); private static final int MAX_VERSIONS = 5; + private static final int BLOCK_SIZE = 64 * 1024; private static final int MIN_VALUE_SIZE = 30; private static final int MAX_VALUE_SIZE = 60; - private static final int NUM_ROWS = 1000; + private static final int NUM_ROWS = 1003; private static final int NUM_COLS_PER_ROW = 20; private static final int NUM_HFILES = 4; private static final int NUM_ROWS_PER_FLUSH = NUM_ROWS / NUM_HFILES; @@ -101,61 +100,73 @@ public class TestEncodedSeekers { .setMaxVersions(MAX_VERSIONS) .setDataBlockEncoding(encoding) .setEncodeOnDisk(encodeOnDisk) + .setBlocksize(BLOCK_SIZE) ); - LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator( - MIN_VALUE_SIZE, MAX_VALUE_SIZE); - // Write - for (int i = 0; i < NUM_ROWS; ++i) { + //write the data, but leave some in the memstore + doPuts(region); + + //verify correctness when memstore contains data + doGets(region); + + //verify correctness again after compacting + region.compactStores(); + doGets(region); + + + Map encodingCounts = cache.getEncodingCountsForTest(); + + // Ensure that compactions don't pollute the cache with unencoded blocks + // in case of in-cache-only encoding. + System.err.println("encodingCounts=" + encodingCounts); + assertEquals(1, encodingCounts.size()); + DataBlockEncoding encodingInCache = encodingCounts.keySet().iterator().next(); + assertEquals(encoding, encodingInCache); + assertTrue(encodingCounts.get(encodingInCache) > 0); + } + + + private void doPuts(HRegion region) throws IOException{ + LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE); + for (int i = 0; i < NUM_ROWS; ++i) { byte[] key = MultiThreadedWriter.longToByteArrayKey(i); for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { Put put = new Put(key); String colAsStr = String.valueOf(j); + byte[] col = Bytes.toBytes(colAsStr); byte[] value = dataGenerator.generateRandomSizeValue(i, colAsStr); put.add(CF_BYTES, Bytes.toBytes(colAsStr), value); + if(VERBOSE){ + KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value); + System.err.println(Strings.padFront(i+"", ' ', 4)+" "+kvPut); + } region.put(put); } if (i % NUM_ROWS_PER_FLUSH == 0) { region.flushcache(); } } - - for (int doneCompaction = 0; doneCompaction <= 1; ++doneCompaction) { - // Read - for (int i = 0; i < NUM_ROWS; ++i) { - final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i); - for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { - if (VERBOSE) { - System.err.println("Reading row " + i + ", column " + j); - } - final String qualStr = String.valueOf(j); - final byte[] qualBytes = Bytes.toBytes(qualStr); - Get get = new Get(rowKey); - get.addColumn(CF_BYTES, qualBytes); - Result result = region.get(get, null); - assertEquals(1, result.size()); - assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr, - result.getValue(CF_BYTES, qualBytes))); + } + + + private void doGets(HRegion region) throws IOException{ + for (int i = 0; i < NUM_ROWS; ++i) { + final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i); + for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { + final String qualStr = String.valueOf(j); + if (VERBOSE) { + System.err.println("Reading row " + i + ", column " + j + " " + Bytes.toString(rowKey)+"/" + +qualStr); } - } - - if (doneCompaction == 0) { - // Compact, then read again at the next loop iteration. - region.compactStores(); + final byte[] qualBytes = Bytes.toBytes(qualStr); + Get get = new Get(rowKey); + get.addColumn(CF_BYTES, qualBytes); + Result result = region.get(get, null); + assertEquals(1, result.size()); + assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr, + result.getValue(CF_BYTES, qualBytes))); } } - - Map encodingCounts = - cache.getEncodingCountsForTest(); - - // Ensure that compactions don't pollute the cache with unencoded blocks - // in case of in-cache-only encoding. - System.err.println("encodingCounts=" + encodingCounts); - assertEquals(1, encodingCounts.size()); - DataBlockEncoding encodingInCache = - encodingCounts.keySet().iterator().next(); - assertEquals(encoding, encodingInCache); - assertTrue(encodingCounts.get(encodingInCache) > 0); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java index c24acc48736..b69761cad7f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java @@ -33,9 +33,8 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; -import org.apache.hadoop.hbase.io.encoding.RedundantKVGenerator; import org.apache.hadoop.hbase.util.ChecksumType; -import org.junit.After; +import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -112,9 +111,8 @@ public class TestHFileDataBlockEncoder { public void testEncodingWritePath() throws IOException { // usually we have just block without headers, but don't complicate that HFileBlock block = getSampleHFileBlock(); - HFileBlockEncodingContext context = - new HFileBlockDefaultEncodingContext( - Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk()); + HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext( + Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), HFileBlock.DUMMY_HEADER); blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(), includesMemstoreTS, context, block.getBlockType()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java index c31eec0a4fd..150e16274db 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestByteBufferUtils.java @@ -310,4 +310,16 @@ public class TestByteBufferUtils { throw new RuntimeException("Bug in test!", e); } } + + @Test + public void testToBytes(){ + ByteBuffer buffer = ByteBuffer.allocate(5); + buffer.put(new byte[]{0,1,2,3,4}); + assertEquals(5, buffer.position()); + assertEquals(5, buffer.limit()); + byte[] copy = ByteBufferUtils.toBytes(buffer, 2); + assertArrayEquals(new byte[]{2,3,4}, copy); + assertEquals(5, buffer.position()); + assertEquals(5, buffer.limit()); + } }