HBASE-7162 Prefix Compression - Trie data block encoding; hbase-common and hbase-server changes

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1410213 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-11-16 06:53:23 +00:00
parent d322efdeb6
commit c5b57b4558
33 changed files with 1992 additions and 146 deletions

View File

@ -37,6 +37,8 @@ import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellComparator;
import com.google.common.primitives.Longs;
@ -63,7 +65,7 @@ import com.google.common.primitives.Longs;
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class KeyValue implements Writable, HeapSize {
public class KeyValue implements Cell, Writable, HeapSize {
static final Log LOG = LogFactory.getLog(KeyValue.class);
// TODO: Group Key-only comparators and operations into a Key class, just
// for neatness sake, if can figure what to call it.
@ -261,12 +263,23 @@ public class KeyValue implements Writable, HeapSize {
/** Here be dragons **/
// used to achieve atomic operations in the memstore.
public long getMemstoreTS() {
@Override
public long getMvccVersion() {
return memstoreTS;
}
public void setMvccVersion(long mvccVersion){
this.memstoreTS = mvccVersion;
}
@Deprecated
public long getMemstoreTS() {
return getMvccVersion();
}
@Deprecated
public void setMemstoreTS(long memstoreTS) {
this.memstoreTS = memstoreTS;
setMvccVersion(memstoreTS);
}
// default value is 0, aka DNC
@ -831,19 +844,21 @@ public class KeyValue implements Writable, HeapSize {
value, voffset, vlength);
}
// Needed doing 'contains' on List. Only compares the key portion, not the
// value.
/**
* Needed doing 'contains' on List. Only compares the key portion, not the value.
*
* For temporary backwards compatibility with the original KeyValue.equals method, we ignore the
* mvccVersion.
*/
@Override
public boolean equals(Object other) {
if (!(other instanceof KeyValue)) {
if (!(other instanceof Cell)) {
return false;
}
KeyValue kv = (KeyValue)other;
// Comparing bytes should be fine doing equals test. Shouldn't have to
// worry about special .META. comparators doing straight equals.
return Bytes.equals(getBuffer(), getKeyOffset(), getKeyLength(),
kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
return CellComparator.equalsIgnoreMvccVersion(this, (Cell)other);
}
@Override
public int hashCode() {
byte[] b = getBuffer();
int start = getOffset(), end = getOffset() + getLength();
@ -864,6 +879,7 @@ public class KeyValue implements Writable, HeapSize {
* Clones a KeyValue. This creates a copy, re-allocating the buffer.
* @return Fully copied clone of this KeyValue
*/
@Override
public KeyValue clone() {
byte [] b = new byte[this.length];
System.arraycopy(this.bytes, this.offset, b, 0, this.length);
@ -1041,9 +1057,18 @@ public class KeyValue implements Writable, HeapSize {
return keyLength;
}
/**
* @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
*/
@Override
public byte[] getValueArray() {
return bytes;
}
/**
* @return Value offset
*/
@Override
public int getValueOffset() {
return getKeyOffset() + getKeyLength();
}
@ -1051,13 +1076,23 @@ public class KeyValue implements Writable, HeapSize {
/**
* @return Value length
*/
@Override
public int getValueLength() {
return Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT);
}
/**
* @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
*/
@Override
public byte[] getRowArray() {
return bytes;
}
/**
* @return Row offset
*/
@Override
public int getRowOffset() {
return getKeyOffset() + Bytes.SIZEOF_SHORT;
}
@ -1065,13 +1100,23 @@ public class KeyValue implements Writable, HeapSize {
/**
* @return Row length
*/
@Override
public short getRowLength() {
return Bytes.toShort(this.bytes, getKeyOffset());
}
/**
* @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
*/
@Override
public byte[] getFamilyArray() {
return bytes;
}
/**
* @return Family offset
*/
@Override
public int getFamilyOffset() {
return getFamilyOffset(getRowLength());
}
@ -1086,6 +1131,7 @@ public class KeyValue implements Writable, HeapSize {
/**
* @return Family length
*/
@Override
public byte getFamilyLength() {
return getFamilyLength(getFamilyOffset());
}
@ -1097,9 +1143,18 @@ public class KeyValue implements Writable, HeapSize {
return this.bytes[foffset-1];
}
/**
* @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
*/
@Override
public byte[] getQualifierArray() {
return bytes;
}
/**
* @return Qualifier offset
*/
@Override
public int getQualifierOffset() {
return getQualifierOffset(getFamilyOffset());
}
@ -1114,6 +1169,7 @@ public class KeyValue implements Writable, HeapSize {
/**
* @return Qualifier length
*/
@Override
public int getQualifierLength() {
return getQualifierLength(getRowLength(),getFamilyLength());
}
@ -1273,6 +1329,7 @@ public class KeyValue implements Writable, HeapSize {
* @return Timestamp
*/
private long timestampCache = -1;
@Override
public long getTimestamp() {
if (timestampCache == -1) {
timestampCache = getTimestamp(getKeyLength());
@ -1296,6 +1353,14 @@ public class KeyValue implements Writable, HeapSize {
return getType(getKeyLength());
}
/**
* @return KeyValue.TYPE byte representation
*/
@Override
public byte getTypeByte() {
return getType(getKeyLength());
}
/**
* @param keylength Pass if you have it to save on a int creation.
* @return Type of this KeyValue.
@ -2564,13 +2629,23 @@ public class KeyValue implements Writable, HeapSize {
}
}
// HeapSize
/**
* HeapSize implementation
*
* We do not count the bytes in the rowCache because it should be empty for a KeyValue in the
* MemStore.
*/
@Override
public long heapSize() {
return ClassSize.align(ClassSize.OBJECT + (2 * ClassSize.REFERENCE) +
ClassSize.align(ClassSize.ARRAY) + ClassSize.align(length) +
(3 * Bytes.SIZEOF_INT) +
ClassSize.align(ClassSize.ARRAY) +
(2 * Bytes.SIZEOF_LONG));
int sum = 0;
sum += ClassSize.OBJECT;// the KeyValue object itself
sum += 2 * ClassSize.REFERENCE;// 2 * pointers to "bytes" and "rowCache" byte[]
sum += 2 * ClassSize.align(ClassSize.ARRAY);// 2 * "bytes" and "rowCache" byte[]
sum += ClassSize.align(length);// number of bytes of data in the "bytes" array
//ignore the data in the rowCache because it is cleared for inactive memstore KeyValues
sum += 3 * Bytes.SIZEOF_INT;// offset, length, keyLength
sum += 2 * Bytes.SIZEOF_LONG;// timestampCache, memstoreTS
return ClassSize.align(sum);
}
// this overload assumes that the length bytes have already been read,
@ -2587,11 +2662,13 @@ public class KeyValue implements Writable, HeapSize {
}
// Writable
@Override
public void readFields(final DataInput in) throws IOException {
int length = in.readInt();
readFields(length, in);
}
@Override
public void write(final DataOutput out) throws IOException {
out.writeInt(this.length);
out.write(this.bytes, this.offset, this.length);

View File

@ -0,0 +1,168 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.IterableUtils;
import org.apache.hadoop.hbase.util.Strings;
import com.google.common.collect.Lists;
@InterfaceAudience.Private
public class KeyValueTestUtil {
public static KeyValue create(
String row,
String family,
String qualifier,
long timestamp,
String value)
{
return create(row, family, qualifier, timestamp, KeyValue.Type.Put, value);
}
public static KeyValue create(
String row,
String family,
String qualifier,
long timestamp,
KeyValue.Type type,
String value)
{
return new KeyValue(
Bytes.toBytes(row),
Bytes.toBytes(family),
Bytes.toBytes(qualifier),
timestamp,
type,
Bytes.toBytes(value)
);
}
public static ByteBuffer toByteBufferAndRewind(final Iterable<? extends KeyValue> kvs,
boolean includeMemstoreTS) {
int totalBytes = KeyValueTool.totalLengthWithMvccVersion(kvs, includeMemstoreTS);
ByteBuffer bb = ByteBuffer.allocate(totalBytes);
for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
KeyValueTool.appendToByteBuffer(bb, kv, includeMemstoreTS);
}
bb.rewind();
return bb;
}
public static List<KeyValue> rewindThenToList(final ByteBuffer bb,
final boolean includesMemstoreTS) {
bb.rewind();
List<KeyValue> kvs = Lists.newArrayList();
KeyValue kv = null;
while (true) {
kv = KeyValueTool.nextShallowCopy(bb, includesMemstoreTS);
if (kv == null) {
break;
}
kvs.add(kv);
}
return kvs;
}
/********************* toString ************************************/
public static String toStringWithPadding(final Collection<? extends KeyValue> kvs,
final boolean includeMeta) {
int maxRowStringLength = 0;
int maxFamilyStringLength = 0;
int maxQualifierStringLength = 0;
int maxTimestampLength = 0;
for (KeyValue kv : kvs) {
maxRowStringLength = Math.max(maxRowStringLength, getRowString(kv).length());
maxFamilyStringLength = Math.max(maxFamilyStringLength, getFamilyString(kv).length());
maxQualifierStringLength = Math.max(maxQualifierStringLength, getQualifierString(kv)
.length());
maxTimestampLength = Math.max(maxTimestampLength, Long.valueOf(kv.getTimestamp()).toString()
.length());
}
StringBuilder sb = new StringBuilder();
for (KeyValue kv : kvs) {
if (sb.length() > 0) {
sb.append("\n");
}
String row = toStringWithPadding(kv, maxRowStringLength, maxFamilyStringLength,
maxQualifierStringLength, maxTimestampLength, includeMeta);
sb.append(row);
}
return sb.toString();
}
protected static String toStringWithPadding(final KeyValue kv, final int maxRowLength,
int maxFamilyLength, int maxQualifierLength, int maxTimestampLength, boolean includeMeta) {
String leadingLengths = "";
String familyLength = kv.getFamilyLength() + " ";
if (includeMeta) {
leadingLengths += Strings.padFront(kv.getKeyLength() + "", '0', 4);
leadingLengths += " ";
leadingLengths += Strings.padFront(kv.getValueLength() + "", '0', 4);
leadingLengths += " ";
leadingLengths += Strings.padFront(kv.getRowLength() + "", '0', 2);
leadingLengths += " ";
}
int spacesAfterRow = maxRowLength - getRowString(kv).length() + 2;
int spacesAfterFamily = maxFamilyLength - getFamilyString(kv).length() + 2;
int spacesAfterQualifier = maxQualifierLength - getQualifierString(kv).length() + 1;
int spacesAfterTimestamp = maxTimestampLength
- Long.valueOf(kv.getTimestamp()).toString().length() + 1;
return leadingLengths + getRowString(kv) + Strings.repeat(' ', spacesAfterRow)
+ familyLength + getFamilyString(kv) + Strings.repeat(' ', spacesAfterFamily)
+ getQualifierString(kv) + Strings.repeat(' ', spacesAfterQualifier)
+ getTimestampString(kv) + Strings.repeat(' ', spacesAfterTimestamp)
+ getTypeString(kv) + " " + getValueString(kv);
}
protected static String getRowString(final KeyValue kv) {
return Bytes.toStringBinary(kv.getRow());
}
protected static String getFamilyString(final KeyValue kv) {
return Bytes.toStringBinary(kv.getFamily());
}
protected static String getQualifierString(final KeyValue kv) {
return Bytes.toStringBinary(kv.getQualifier());
}
protected static String getTimestampString(final KeyValue kv) {
return kv.getTimestamp() + "";
}
protected static String getTypeString(final KeyValue kv) {
return KeyValue.Type.codeToType(kv.getType()).toString();
}
protected static String getValueString(final KeyValue kv) {
return Bytes.toStringBinary(kv.getValue());
}
}

View File

@ -0,0 +1,198 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.IterableUtils;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hbase.Cell;
import org.apache.hbase.cell.CellTool;
/**
* static convenience methods for dealing with KeyValues and collections of KeyValues
*/
@InterfaceAudience.Private
public class KeyValueTool {
/**************** length *********************/
public static int length(final Cell cell) {
return (int)KeyValue.getKeyValueDataStructureSize(cell.getRowLength(), cell.getFamilyLength(),
cell.getQualifierLength(), cell.getValueLength());
}
protected static int keyLength(final Cell cell) {
return (int)KeyValue.getKeyDataStructureSize(cell.getRowLength(), cell.getFamilyLength(),
cell.getQualifierLength());
}
public static int lengthWithMvccVersion(final KeyValue kv, final boolean includeMvccVersion) {
int length = kv.getLength();
if (includeMvccVersion) {
length += WritableUtils.getVIntSize(kv.getMvccVersion());
}
return length;
}
public static int totalLengthWithMvccVersion(final Iterable<? extends KeyValue> kvs,
final boolean includeMvccVersion) {
int length = 0;
for (KeyValue kv : IterableUtils.nullSafe(kvs)) {
length += lengthWithMvccVersion(kv, includeMvccVersion);
}
return length;
}
/**************** copy key only *********************/
public static KeyValue copyToNewKeyValue(final Cell cell) {
KeyValue kvCell = new KeyValue(copyToNewByteArray(cell));
kvCell.setMvccVersion(cell.getMvccVersion());
return kvCell;
}
public static ByteBuffer copyKeyToNewByteBuffer(final Cell cell) {
byte[] bytes = new byte[keyLength(cell)];
appendKeyToByteArrayWithoutValue(cell, bytes, 0);
ByteBuffer buffer = ByteBuffer.wrap(bytes);
buffer.position(buffer.limit());//make it look as if each field were appended
return buffer;
}
public static byte[] copyToNewByteArray(final Cell cell) {
int v1Length = length(cell);
byte[] backingBytes = new byte[v1Length];
appendToByteArray(cell, backingBytes, 0);
return backingBytes;
}
protected static int appendKeyToByteArrayWithoutValue(final Cell cell, final byte[] output,
final int offset) {
int nextOffset = offset;
nextOffset = Bytes.putShort(output, nextOffset, cell.getRowLength());
nextOffset = CellTool.copyRowTo(cell, output, nextOffset);
nextOffset = Bytes.putByte(output, nextOffset, cell.getFamilyLength());
nextOffset = CellTool.copyFamilyTo(cell, output, nextOffset);
nextOffset = CellTool.copyQualifierTo(cell, output, nextOffset);
nextOffset = Bytes.putLong(output, nextOffset, cell.getTimestamp());
nextOffset = Bytes.putByte(output, nextOffset, cell.getTypeByte());
return nextOffset;
}
/**************** copy key and value *********************/
public static int appendToByteArray(final Cell cell, final byte[] output, final int offset) {
int pos = offset;
pos = Bytes.putInt(output, pos, keyLength(cell));
pos = Bytes.putInt(output, pos, cell.getValueLength());
pos = appendKeyToByteArrayWithoutValue(cell, output, pos);
CellTool.copyValueTo(cell, output, pos);
return pos + cell.getValueLength();
}
public static ByteBuffer copyToNewByteBuffer(final Cell cell) {
byte[] bytes = new byte[length(cell)];
appendToByteArray(cell, bytes, 0);
ByteBuffer buffer = ByteBuffer.wrap(bytes);
buffer.position(buffer.limit());//make it look as if each field were appended
return buffer;
}
public static void appendToByteBuffer(final ByteBuffer bb, final KeyValue kv,
final boolean includeMvccVersion) {
// keep pushing the limit out. assume enough capacity
bb.limit(bb.position() + kv.getLength());
bb.put(kv.getBuffer(), kv.getOffset(), kv.getLength());
if (includeMvccVersion) {
int numMvccVersionBytes = WritableUtils.getVIntSize(kv.getMvccVersion());
ByteBufferUtils.extendLimit(bb, numMvccVersionBytes);
ByteBufferUtils.writeVLong(bb, kv.getMvccVersion());
}
}
/**************** iterating *******************************/
/**
* Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
* position to the start of the next KeyValue. Does not allocate a new array or copy data.
*/
public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion) {
if (bb.isDirect()) {
throw new IllegalArgumentException("only supports heap buffers");
}
if (bb.remaining() < 1) {
return null;
}
int underlyingArrayOffset = bb.arrayOffset() + bb.position();
int keyLength = bb.getInt();
int valueLength = bb.getInt();
int kvLength = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keyLength + valueLength;
KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
ByteBufferUtils.skip(bb, keyLength + valueLength);
if (includesMvccVersion) {
long mvccVersion = ByteBufferUtils.readVLong(bb);
keyValue.setMvccVersion(mvccVersion);
}
return keyValue;
}
/*************** next/previous **********************************/
/**
* Append single byte 0x00 to the end of the input row key
*/
public static KeyValue createFirstKeyInNextRow(final Cell in){
byte[] nextRow = new byte[in.getRowLength() + 1];
System.arraycopy(in.getRowArray(), in.getRowOffset(), nextRow, 0, in.getRowLength());
nextRow[nextRow.length - 1] = 0;//maybe not necessary
return KeyValue.createFirstOnRow(nextRow);
}
/**
* Increment the row bytes and clear the other fields
*/
public static KeyValue createFirstKeyInIncrementedRow(final Cell in){
byte[] thisRow = new ByteRange(in.getRowArray(), in.getRowOffset(), in.getRowLength())
.deepCopyToNewArray();
byte[] nextRow = Bytes.unsignedCopyAndIncrement(thisRow);
return KeyValue.createFirstOnRow(nextRow);
}
/**
* Decrement the timestamp. For tests (currently wasteful)
*
* Remember timestamps are sorted reverse chronologically.
* @param in
* @return
*/
public static KeyValue previousKey(final KeyValue in) {
return KeyValue.createFirstOnRow(CellTool.getRowArray(in), CellTool.getFamilyArray(in),
CellTool.getQualifierArray(in), in.getTimestamp() - 1);
}
}

View File

@ -162,7 +162,7 @@ public interface DataBlockEncoder {
*/
public ByteBuffer getValueShallowCopy();
/** @return key value at current position. */
/** @return key value at current position with position set to limit */
public ByteBuffer getKeyValueBuffer();
/**

View File

@ -24,7 +24,6 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
/**
* A default implementation of {@link HFileBlockDecodingContext}. It assumes the

View File

@ -25,10 +25,11 @@ import java.io.IOException;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
import com.google.common.base.Preconditions;
/**
* A default implementation of {@link HFileBlockEncodingContext}. It will
* compress the data section as one continuous buffer.
@ -85,28 +86,15 @@ public class HFileBlockDefaultEncodingContext implements
+ compressionAlgorithm, e);
}
}
if (headerBytes == null) {
dummyHeader = HFileBlock.DUMMY_HEADER;
} else {
dummyHeader = headerBytes;
}
}
/**
* @param compressionAlgorithm compression algorithm
* @param encoding encoding
*/
public HFileBlockDefaultEncodingContext(
Compression.Algorithm compressionAlgorithm,
DataBlockEncoding encoding) {
this(compressionAlgorithm, encoding, null);
dummyHeader = Preconditions.checkNotNull(headerBytes,
"Please pass HFileBlock.DUMMY_HEADER instead of null for param headerBytes");
}
/**
* prepare to start a new encoding.
* @throws IOException
*/
void prepareEncoding() throws IOException {
public void prepareEncoding() throws IOException {
encodedStream.reset();
dataOut.write(dummyHeader);
if (encodingAlgo != null

View File

@ -17,6 +17,7 @@
package org.apache.hadoop.hbase.io.encoding;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.BlockType;
@ -30,6 +31,11 @@ import org.apache.hadoop.hbase.io.hfile.BlockType;
*/
public interface HFileBlockEncodingContext {
/**
* @return OutputStream to which encoded data is written
*/
public OutputStream getOutputStreamForEncoder();
/**
* @return encoded and compressed bytes with header which are ready to write
* out to disk

View File

@ -0,0 +1,126 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.util.ArrayList;
import java.util.Arrays;
public class ArrayUtils {
public static int length(byte[] a) {
if (a == null) {
return 0;
}
return a.length;
}
public static int length(long[] a) {
if (a == null) {
return 0;
}
return a.length;
}
public static int length(Object[] a) {
if (a == null) {
return 0;
}
return a.length;
}
public static boolean isEmpty(byte[] a) {
if (a == null) {
return true;
}
if (a.length == 0) {
return true;
}
return false;
}
public static boolean isEmpty(long[] a) {
if (a == null) {
return true;
}
if (a.length == 0) {
return true;
}
return false;
}
public static boolean isEmpty(Object[] a) {
if (a == null) {
return true;
}
if (a.length == 0) {
return true;
}
return false;
}
public static long getFirst(long[] a) {
return a[0];
}
public static long getLast(long[] a) {
return a[a.length - 1];
}
public static int getTotalLengthOfArrays(Iterable<byte[]> arrays) {
if (arrays == null) {
return 0;
}
int length = 0;
for (byte[] bytes : arrays) {
length += length(bytes);
}
return length;
}
public static ArrayList<Long> toList(long[] array){
int length = length(array);
ArrayList<Long> list = new ArrayList<Long>(length);
for(int i=0; i < length; ++i){
list.add(array[i]);
}
return list;
}
public static byte[] growIfNecessary(byte[] array, int minLength, int numAdditionalBytes) {
if(array.length >= minLength){
return array;
}
return Arrays.copyOf(array, minLength + numAdditionalBytes);
}
public static int[] growIfNecessary(int[] array, int minLength, int numAdditionalInts) {
if(array.length >= minLength){
return array;
}
return Arrays.copyOf(array, minLength + numAdditionalInts);
}
public static long[] growIfNecessary(long[] array, int minLength, int numAdditionalLongs) {
if(array.length >= minLength){
return array;
}
return Arrays.copyOf(array, minLength + numAdditionalLongs);
}
}

View File

@ -16,6 +16,7 @@
*/
package org.apache.hadoop.hbase.util;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.IOException;
@ -25,8 +26,7 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.io.encoding.
EncoderBufferTooSmallException;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.WritableUtils;
/**
@ -299,23 +299,6 @@ public final class ByteBufferUtils {
return tmpLength;
}
/**
* Asserts that there is at least the given amount of unfilled space
* remaining in the given buffer.
* @param out typically, the buffer we are writing to
* @param length the required space in the buffer
* @throws EncoderBufferTooSmallException If there are no enough bytes.
*/
public static void ensureSpace(ByteBuffer out, int length)
throws EncoderBufferTooSmallException {
if (out.position() + length > out.limit()) {
throw new EncoderBufferTooSmallException(
"Buffer position=" + out.position() +
", buffer limit=" + out.limit() +
", length to be written=" + length);
}
}
/**
* Copy the given number of bytes from the given stream and put it at the
* current position of the given buffer, updating the position in the buffer.
@ -335,6 +318,17 @@ public final class ByteBufferUtils {
}
}
}
/**
* Copy from the InputStream to a new heap ByteBuffer until the InputStream is exhausted.
*/
public static ByteBuffer drainInputStreamToBuffer(InputStream is) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
IOUtils.copyBytes(is, baos, 4096, true);
ByteBuffer buffer = ByteBuffer.wrap(baos.toByteArray());
buffer.rewind();
return buffer;
}
/**
* Copy from one buffer to another from given offset
@ -440,4 +434,24 @@ public final class ByteBufferUtils {
buffer.position(buffer.position() + length);
}
public static void extendLimit(ByteBuffer buffer, int numBytes) {
buffer.limit(buffer.limit() + numBytes);
}
/**
* Copy the bytes from position to limit into a new byte[] of the exact length and sets the
* position and limit back to their original values (though not thread safe).
* @param buffer copy from here
* @param startPosition put buffer.get(startPosition) into byte[0]
* @return a new byte[] containing the bytes in the specified range
*/
public static byte[] toBytes(ByteBuffer buffer, int startPosition) {
int originalPosition = buffer.position();
byte[] output = new byte[buffer.limit() - startPosition];
buffer.position(startPosition);
buffer.get(output);
buffer.position(originalPosition);
return output;
}
}

View File

@ -0,0 +1,301 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
/**
* Lightweight, reusable class for specifying ranges of byte[]'s. CompareTo and equals methods are
* lexicographic, which is native to HBase.
* <p/>
* This class differs from ByteBuffer:
* <li/>On-heap bytes only
* <li/>Implements equals, hashCode, and compareTo so that it can be used in standard java
* Collections, similar to String.
* <li/>Does not maintain mark/position iterator state inside the class. Doing so leads to many bugs
* in complex applications.
* <li/>Allows the addition of simple core methods like this.copyTo(that, offset).
* <li/>Can be reused in tight loops like a major compaction which can save significant amounts of
* garbage.
* <li/>(Without reuse, we throw off garbage like this thing:
* http://www.youtube.com/watch?v=lkmBH-MjZF4
* <p/>
* Mutable, and always evaluates equals, hashCode, and compareTo based on the current contents.
* <p/>
* Can contain convenience methods for comparing, printing, cloning, spawning new arrays, copying to
* other arrays, etc. Please place non-core methods into {@link ByteRangeTool}.
* <p/>
* We may consider converting this to an interface and creating separate implementations for a
* single byte[], a paged byte[] (growable byte[][]), a ByteBuffer, etc
*/
public class ByteRange implements Comparable<ByteRange> {
private static final int UNSET_HASH_VALUE = -1;
/********************** fields *****************************/
// Do not make these final, as the intention is to reuse objects of this class
/**
* The array containing the bytes in this range. It will be >= length.
*/
private byte[] bytes;
/**
* The index of the first byte in this range. ByteRange.get(0) will return bytes[offset].
*/
private int offset;
/**
* The number of bytes in the range. Offset + length must be <= bytes.length
*/
private int length;
/**
* Variable for lazy-caching the hashCode of this range. Useful for frequently used ranges,
* long-lived ranges, or long ranges.
*/
private int hash = UNSET_HASH_VALUE;
/********************** construct ***********************/
public ByteRange() {
set(new byte[0]);//Could probably get away with a null array if the need arises.
}
public ByteRange(byte[] bytes) {
set(bytes);
}
public ByteRange(byte[] bytes, int offset, int length) {
set(bytes, offset, length);
}
/********************** write methods *************************/
public ByteRange clear() {
clearHashCache();
bytes = null;
offset = 0;
length = 0;
return this;
}
public ByteRange set(byte[] bytes) {
clearHashCache();
this.bytes = bytes;
this.offset = 0;
this.length = ArrayUtils.length(bytes);
return this;
}
public ByteRange set(byte[] bytes, int offset, int length) {
clearHashCache();
this.bytes = bytes;
this.offset = offset;
this.length = length;
return this;
}
public void setLength(int length) {
clearHashCache();
this.length = length;
}
/*********** read methods (add non-core methods to ByteRangeUtils) *************/
/**
* @param index zero-based index
* @return single byte at index
*/
public byte get(int index) {
return bytes[offset + index];
}
/**
* Instantiate a new byte[] with exact length, which is at least 24 bytes + length. Copy the
* contents of this range into it.
* @return The newly cloned byte[].
*/
public byte[] deepCopyToNewArray() {
byte[] result = new byte[length];
System.arraycopy(bytes, offset, result, 0, length);
return result;
}
/**
* Create a new ByteRange with new backing byte[] and copy the state of this range into the new
* range. Copy the hash over if it is already calculated.
* @return
*/
public ByteRange deepCopy() {
ByteRange clone = new ByteRange(deepCopyToNewArray());
if (isHashCached()) {
clone.hash = hash;
}
return clone;
}
/**
* Wrapper for System.arraycopy. Copy the contents of this range into the provided array.
* @param destination Copy to this array
* @param destinationOffset First index in the destination array.
* @return void to avoid confusion between which ByteRange should be returned
*/
public void deepCopyTo(byte[] destination, int destinationOffset) {
System.arraycopy(bytes, offset, destination, destinationOffset, length);
}
/**
* Wrapper for System.arraycopy. Copy the contents of this range into the provided array.
* @param innerOffset Start copying from this index in this source ByteRange. First byte copied is
* bytes[offset + innerOffset]
* @param copyLength Copy this many bytes
* @param destination Copy to this array
* @param destinationOffset First index in the destination array.
* @return void to avoid confusion between which ByteRange should be returned
*/
public void deepCopySubRangeTo(int innerOffset, int copyLength, byte[] destination,
int destinationOffset) {
System.arraycopy(bytes, offset + innerOffset, destination, destinationOffset, copyLength);
}
/**
* Create a new ByteRange that points at this range's byte[]. The new range can have different
* values for offset and length, but modifying the shallowCopy will modify the bytes in this
* range's array. Pass over the hash code if it is already cached.
* @param innerOffset First byte of clone will be this.offset + copyOffset.
* @param copyLength Number of bytes in the clone.
* @return new ByteRange object referencing this range's byte[].
*/
public ByteRange shallowCopySubRange(int innerOffset, int copyLength) {
ByteRange clone = new ByteRange(bytes, offset + innerOffset, copyLength);
if (isHashCached()) {
clone.hash = hash;
}
return clone;
}
//TODO move to ByteRangeUtils because it is non-core method
public int numEqualPrefixBytes(ByteRange that, int thatInnerOffset) {
int maxCompares = Math.min(length, that.length - thatInnerOffset);
for (int i = 0; i < maxCompares; ++i) {
if (bytes[offset + i] != that.bytes[that.offset + thatInnerOffset + i]) {
return i;
}
}
return maxCompares;
}
public byte[] getBytes() {
return bytes;
}
public int getOffset() {
return offset;
}
public int getLength() {
return length;
}
public boolean isEmpty(){
return isEmpty(this);
}
public boolean notEmpty(){
return notEmpty(this);
}
/******************* static methods ************************/
public static boolean isEmpty(ByteRange range){
return range == null || range.length == 0;
}
public static boolean notEmpty(ByteRange range){
return range != null && range.length > 0;
}
/******************* standard methods *********************/
@Override
public boolean equals(Object thatObject) {
if (thatObject == null){
return false;
}
if (this == thatObject) {
return true;
}
if (hashCode() != thatObject.hashCode()) {
return false;
}
if (!(thatObject instanceof ByteRange)) {
return false;
}
ByteRange that = (ByteRange) thatObject;
return Bytes.equals(bytes, offset, length, that.bytes, that.offset, that.length);
}
@Override
public int hashCode() {
if (isHashCached()) {// hash is already calculated and cached
return hash;
}
if (this.isEmpty()) {// return 0 for empty ByteRange
hash = 0;
return hash;
}
int off = offset;
hash = 0;
for (int i = 0; i < length; i++) {
hash = 31 * hash + bytes[off++];
}
return hash;
}
private boolean isHashCached() {
return hash != UNSET_HASH_VALUE;
}
private void clearHashCache() {
hash = UNSET_HASH_VALUE;
}
/**
* Bitwise comparison of each byte in the array. Unsigned comparison, not paying attention to
* java's signed bytes.
*/
@Override
public int compareTo(ByteRange other) {
return Bytes.compareTo(bytes, offset, length, other.bytes, other.offset, other.length);
}
@Override
public String toString() {
return Bytes.toStringBinary(bytes, offset, length);
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.util.ArrayList;
import java.util.Collection;
import com.google.common.collect.Lists;
/**
* Utility methods {@link ByteRange}.
*/
public class ByteRangeTool {
public static ArrayList<byte[]> copyToNewArrays(Collection<ByteRange> ranges) {
if (ranges == null) {
return new ArrayList<byte[]>(0);
}
ArrayList<byte[]> arrays = Lists.newArrayListWithCapacity(ranges.size());
for (ByteRange range : ranges) {
arrays.add(range.deepCopyToNewArray());
}
return arrays;
}
public static ArrayList<ByteRange> fromArrays(Collection<byte[]> arrays) {
if (arrays == null) {
return new ArrayList<ByteRange>(0);
}
ArrayList<ByteRange> ranges = Lists.newArrayListWithCapacity(arrays.size());
for (byte[] array : arrays) {
ranges.add(new ByteRange(array));
}
return ranges;
}
}

View File

@ -1647,7 +1647,7 @@ public class Bytes {
return toString(b, 0, n);
}
/**
* Copy the byte array given in parameter and return an instance
* of a new byte array with the same length and the same content.
@ -1660,4 +1660,62 @@ public class Bytes {
System.arraycopy(bytes, 0, result, 0, bytes.length);
return result;
}
/**
* Search sorted array "a" for byte "key". I can't remember if I wrote this or copied it from
* somewhere. (mcorgan)
* @param a Array to search. Entries must be sorted and unique.
* @param fromIndex First index inclusive of "a" to include in the search.
* @param toIndex Last index exclusive of "a" to include in the search.
* @param key The byte to search for.
* @return The index of key if found. If not found, return -(index + 1), where negative indicates
* "not found" and the "index + 1" handles the "-0" case.
*/
public static int unsignedBinarySearch(byte[] a, int fromIndex, int toIndex, byte key) {
int unsignedKey = key & 0xff;
int low = fromIndex;
int high = toIndex - 1;
while (low <= high) {
int mid = (low + high) >>> 1;
int midVal = a[mid] & 0xff;
if (midVal < unsignedKey) {
low = mid + 1;
} else if (midVal > unsignedKey) {
high = mid - 1;
} else {
return mid; // key found
}
}
return -(low + 1); // key not found.
}
/**
* Treat the byte[] as an unsigned series of bytes, most significant bits first. Start by adding
* 1 to the rightmost bit/byte and carry over all overflows to the more significant bits/bytes.
*
* @param input The byte[] to increment.
* @return The incremented copy of "in". May be same length or 1 byte longer.
*/
public static byte[] unsignedCopyAndIncrement(final byte[] input) {
byte[] copy = copy(input);
if (copy == null) {
throw new IllegalArgumentException("cannot increment null array");
}
for (int i = copy.length - 1; i >= 0; --i) {
if (copy[i] == -1) {// -1 is all 1-bits, which is the unsigned maximum
copy[i] = 0;
} else {
++copy[i];
return copy;
}
}
// we maxed out the array
byte[] out = new byte[copy.length + 1];
out[0] = 1;
System.arraycopy(copy, 0, out, 1, copy.length);
return out;
}
}

View File

@ -0,0 +1,104 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
* Utility methods for dealing with Collections, including treating null collections as empty.
*/
public class CollectionUtils {
private static final List<Object> EMPTY_LIST = Collections.unmodifiableList(
new ArrayList<Object>(0));
@SuppressWarnings("unchecked")
public static <T> Collection<T> nullSafe(Collection<T> in) {
if (in == null) {
return (Collection<T>)EMPTY_LIST;
}
return in;
}
/************************ size ************************************/
public static <T> int nullSafeSize(Collection<T> collection) {
if (collection == null) {
return 0;
}
return collection.size();
}
public static <A, B> boolean nullSafeSameSize(Collection<A> a, Collection<B> b) {
return nullSafeSize(a) == nullSafeSize(b);
}
/*************************** empty ****************************************/
public static <T> boolean isEmpty(Collection<T> collection) {
return collection == null || collection.isEmpty();
}
public static <T> boolean notEmpty(Collection<T> collection) {
return !isEmpty(collection);
}
/************************ first/last **************************/
public static <T> T getFirst(Collection<T> collection) {
if (CollectionUtils.isEmpty(collection)) {
return null;
}
for (T t : collection) {
return t;
}
return null;
}
/**
* @param list any list
* @return -1 if list is empty, otherwise the max index
*/
public static int getLastIndex(List<?> list){
if(isEmpty(list)){
return -1;
}
return list.size() - 1;
}
/**
* @param list
* @param index the index in question
* @return true if it is the last index or if list is empty and -1 is passed for the index param
*/
public static boolean isLastIndex(List<?> list, int index){
return index == getLastIndex(list);
}
public static <T> T getLast(List<T> list) {
if (isEmpty(list)) {
return null;
}
return list.get(list.size() - 1);
}
}

View File

@ -1,5 +1,4 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -17,37 +16,26 @@
* limitations under the License.
*/
package org.apache.hadoop.hbase;
package org.apache.hadoop.hbase.util;
import org.apache.hadoop.hbase.util.Bytes;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class KeyValueTestUtil {
/**
* Utility methods for Iterable including null-safe handlers.
*/
public class IterableUtils {
public static KeyValue create(
String row,
String family,
String qualifier,
long timestamp,
String value)
{
return create(row, family, qualifier, timestamp, KeyValue.Type.Put, value);
private static final List<Object> EMPTY_LIST = Collections
.unmodifiableList(new ArrayList<Object>(0));
@SuppressWarnings("unchecked")
public static <T> Iterable<T> nullSafe(Iterable<T> in) {
if (in == null) {
return (List<T>) EMPTY_LIST;
}
return in;
}
public static KeyValue create(
String row,
String family,
String qualifier,
long timestamp,
KeyValue.Type type,
String value)
{
return new KeyValue(
Bytes.toBytes(row),
Bytes.toBytes(family),
Bytes.toBytes(qualifier),
timestamp,
type,
Bytes.toBytes(value)
);
}
}

View File

@ -76,4 +76,41 @@ public class Strings {
return null;
return dnPtr.endsWith(".") ? dnPtr.substring(0, dnPtr.length()-1) : dnPtr;
}
/**
* Null-safe length check.
* @param input
* @return true if null or length==0
*/
public static boolean isEmpty(String input) {
return input == null || input.length() == 0;
}
/**
* Push the input string to the right by appending a character before it, usually a space.
* @param input the string to pad
* @param padding the character to repeat to the left of the input string
* @param length the desired total length including the padding
* @return padding characters + input
*/
public static String padFront(String input, char padding, int length) {
if (input.length() > length) {
throw new IllegalArgumentException("input \"" + input + "\" longer than maxLength=" + length);
}
int numPaddingCharacters = length - input.length();
return repeat(padding, numPaddingCharacters) + input;
}
/**
* @param c repeat this character
* @param reapeatFor the length of the output String
* @return c, repeated repeatFor times
*/
public static String repeat(char c, int reapeatFor) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < reapeatFor; ++i) {
sb.append(c);
}
return sb.toString();
}
}

View File

@ -80,8 +80,10 @@ public class LoadTestKVGenerator {
*/
public byte[] generateRandomSizeValue(long key, String qual) {
String rowKey = md5PrefixedKey(key);
int dataSize = minValueSize + randomForValueSize.nextInt(
Math.abs(maxValueSize - minValueSize));
int dataSize = minValueSize;
if(minValueSize != maxValueSize){
dataSize = minValueSize + randomForValueSize.nextInt(Math.abs(maxValueSize - minValueSize));
}
return getValueForRowColumn(rowKey, qual, dataSize);
}

View File

@ -14,7 +14,7 @@
* License for the specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hbase.io.encoding;
package org.apache.hadoop.hbase.util.test;
import java.nio.ByteBuffer;
import java.util.ArrayList;

View File

@ -0,0 +1,173 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hbase.cell.CellTool;
/**
* The unit of storage in HBase consisting of the following fields:<br/>
* <pre>
* 1) row
* 2) column family
* 3) column qualifier
* 4) timestamp
* 5) type
* 6) MVCC version
* 7) value
* </pre>
* <p/>
* Uniqueness is determined by the combination of row, column family, column qualifier,
* timestamp, and type.
* <p/>
* The natural comparator will perform a bitwise comparison on row, column family, and column
* qualifier. Less intuitively, it will then treat the greater timestamp as the lesser value with
* the goal of sorting newer cells first.
* <p/>
* This interface does not include methods that allocate new byte[]'s such as those used in client
* or debugging code. These should be placed in a sub-interface or the {@link CellTool} class.
* <p/>
* Cell implements Comparable<Cell> which is only meaningful when comparing to other keys in the
* same table. It uses {@link #CellComparator} which does not work on the -ROOT- and .META. tables.
* <p/>
* In the future, we may consider adding a boolean isOnHeap() method and a getValueBuffer() method
* that can be used to pass a value directly from an off-heap ByteBuffer to the network without
* copying into an on-heap byte[].
* <p/>
* Historic note: the original Cell implementation (KeyValue) requires that all fields be encoded as
* consecutive bytes in the same byte[], whereas this interface allows fields to reside in separate
* byte[]'s.
* <p/>
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public interface Cell {
//1) Row
/**
* Contiguous raw bytes that may start at any index in the containing array. Max length is
* Short.MAX_VALUE which is 32,767 bytes.
* @return The array containing the row bytes.
*/
byte[] getRowArray();
/**
* @return Array index of first row byte
*/
int getRowOffset();
/**
* @return Number of row bytes. Must be < rowArray.length - offset.
*/
short getRowLength();
//2) Family
/**
* Contiguous bytes composed of legal HDFS filename characters which may start at any index in the
* containing array. Max length is Byte.MAX_VALUE, which is 127 bytes.
* @return the array containing the family bytes.
*/
byte[] getFamilyArray();
/**
* @return Array index of first row byte
*/
int getFamilyOffset();
/**
* @return Number of family bytes. Must be < familyArray.length - offset.
*/
byte getFamilyLength();
//3) Qualifier
/**
* Contiguous raw bytes that may start at any index in the containing array. Max length is
* Short.MAX_VALUE which is 32,767 bytes.
* @return The array containing the qualifier bytes.
*/
byte[] getQualifierArray();
/**
* @return Array index of first qualifier byte
*/
int getQualifierOffset();
/**
* @return Number of qualifier bytes. Must be < qualifierArray.length - offset.
*/
int getQualifierLength();
//4) Timestamp
/**
* @return Long value representing time at which this cell was "Put" into the row. Typically
* represents the time of insertion, but can be any value from Long.MIN_VALUE to Long.MAX_VALUE.
*/
long getTimestamp();
//5) Type
/**
* see {@link #KeyValue.TYPE}
* @return The byte representation of the KeyValue.TYPE of this cell: one of Put, Delete, etc
*/
byte getTypeByte();
//6) MvccVersion
/**
* Internal use only. A region-specific sequence ID given to each operation. It always exists for
* cells in the memstore but is not retained forever. It may survive several flushes, but
* generally becomes irrelevant after the cell's row is no longer involved in any operations that
* require strict consistency.
* @return mvccVersion (always >= 0 if exists), or 0 if it no longer exists
*/
long getMvccVersion();
//7) Value
/**
* Contiguous raw bytes that may start at any index in the containing array. Max length is
* Integer.MAX_VALUE which is 2,147,483,648 bytes.
* @return The array containing the value bytes.
*/
byte[] getValueArray();
/**
* @return Array index of first value byte
*/
int getValueOffset();
/**
* @return Number of value bytes. Must be < valueArray.length - offset.
*/
int getValueLength();
}

View File

@ -0,0 +1,178 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.cell;
import java.io.Serializable;
import java.util.Comparator;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hbase.Cell;
import com.google.common.primitives.Longs;
/**
* Compare two traditional HBase cells.
*
* Note: This comparator is not valid for -ROOT- and .META. tables.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class CellComparator implements Comparator<Cell>, Serializable{
private static final long serialVersionUID = -8760041766259623329L;
@Override
public int compare(Cell a, Cell b) {
return compareStatic(a, b);
}
public static int compareStatic(Cell a, Cell b) {
//row
int c = Bytes.compareTo(
a.getRowArray(), a.getRowOffset(), a.getRowLength(),
b.getRowArray(), b.getRowOffset(), b.getRowLength());
if (c != 0) return c;
//family
c = Bytes.compareTo(
a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
if (c != 0) return c;
//qualifier
c = Bytes.compareTo(
a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
if (c != 0) return c;
//timestamp: later sorts first
c = -Longs.compare(a.getTimestamp(), b.getTimestamp());
if (c != 0) return c;
//type
c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte());
if (c != 0) return c;
//mvccVersion: later sorts first
return -Longs.compare(a.getMvccVersion(), b.getMvccVersion());
}
/**************** equals ****************************/
public static boolean equals(Cell a, Cell b){
if (!areKeyLengthsEqual(a, b)) {
return false;
}
//TODO compare byte[]'s in reverse since later bytes more likely to differ
return 0 == compareStatic(a, b);
}
public static boolean equalsRow(Cell a, Cell b){
if(!areRowLengthsEqual(a, b)){
return false;
}
return 0 == Bytes.compareTo(
a.getRowArray(), a.getRowOffset(), a.getRowLength(),
b.getRowArray(), b.getRowOffset(), b.getRowLength());
}
/********************* hashCode ************************/
/**
* Returns a hash code that is always the same for two Cells having a matching equals(..) result.
* Currently does not guard against nulls, but it could if necessary.
*/
public static int hashCode(Cell cell){
if (cell == null) {// return 0 for empty Cell
return 0;
}
//pre-calculate the 3 hashes made of byte ranges
int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength());
//combine the 6 sub-hashes
int hash = 31 * rowHash + familyHash;
hash = 31 * hash + qualifierHash;
hash = 31 * hash + (int)cell.getTimestamp();
hash = 31 * hash + cell.getTypeByte();
hash = 31 * hash + (int)cell.getMvccVersion();
return hash;
}
/******************** lengths *************************/
public static boolean areKeyLengthsEqual(Cell a, Cell b) {
return a.getRowLength() == b.getRowLength()
&& a.getFamilyLength() == b.getFamilyLength()
&& a.getQualifierLength() == b.getQualifierLength();
}
public static boolean areRowLengthsEqual(Cell a, Cell b) {
return a.getRowLength() == b.getRowLength();
}
/***************** special cases ****************************/
/**
* special case for KeyValue.equals
*/
private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) {
//row
int c = Bytes.compareTo(
a.getRowArray(), a.getRowOffset(), a.getRowLength(),
b.getRowArray(), b.getRowOffset(), b.getRowLength());
if (c != 0) return c;
//family
c = Bytes.compareTo(
a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
if (c != 0) return c;
//qualifier
c = Bytes.compareTo(
a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
if (c != 0) return c;
//timestamp: later sorts first
c = -Longs.compare(a.getTimestamp(), b.getTimestamp());
if (c != 0) return c;
//type
c = (0xff & a.getTypeByte()) - (0xff & b.getTypeByte());
return c;
}
/**
* special case for KeyValue.equals
*/
public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){
return 0 == compareStaticIgnoreMvccVersion(a, b);
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.cell;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hbase.Cell;
/**
* Accepts a stream of Cells and adds them to its internal data structure. This can be used to build
* a block of cells during compactions and flushes, or to build a byte[] to send to the client. This
* could be backed by a List<KeyValue>, but more efficient implementations will append results to a
* byte[] to eliminate overhead, and possibly encode the cells further.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public interface CellOutputStream {
/**
* Implementation must copy the entire state of the Cell. If the appended Cell is modified
* immediately after the append method returns, the modifications must have absolutely no effect
* on the copy of the Cell that was added to the appender. For example, calling someList.add(cell)
* is not correct.
*/
void write(Cell cell);
/**
* Let the implementation decide what to do. Usually means writing accumulated data into a byte[]
* that can then be read from the implementation to be sent to disk, put in the block cache, or
* sent over the network.
*/
void flush();
}

View File

@ -0,0 +1,68 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.cell;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* An indicator of the state of the scanner after an operation such as nextCell() or positionAt(..).
* For example:
* <ul>
* <li>In a DataBlockScanner, the AFTER_LAST position indicates to the parent StoreFileScanner that
* it should load the next block.</li>
* <li>In a StoreFileScanner, the AFTER_LAST position indicates that the file has been exhausted.</li>
* <li>In a RegionScanner, the AFTER_LAST position indicates that the scanner should move to the
* next region.</li>
* </ul>
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public enum CellScannerPosition {
/**
* getCurrentCell() will NOT return a valid cell. Calling nextCell() will advance to the first
* cell.
*/
BEFORE_FIRST,
/**
* getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
* rather it is the nearest cell before the requested cell.
*/
BEFORE,
/**
* getCurrentCell() will return a valid cell, and it is exactly the cell that was requested by
* positionAt(..).
*/
AT,
/**
* getCurrentCell() will return a valid cell, but it is not the cell requested by positionAt(..),
* rather it is the nearest cell after the requested cell.
*/
AFTER,
/**
* getCurrentCell() will NOT return a valid cell. Calling nextCell() will have no effect.
*/
AFTER_LAST
}

View File

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hbase.cell;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hbase.Cell;
@InterfaceAudience.Private
@InterfaceStability.Evolving
public final class CellTool {
/******************* ByteRange *******************************/
public static ByteRange fillRowRange(Cell cell, ByteRange range) {
return range.set(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
}
public static ByteRange fillFamilyRange(Cell cell, ByteRange range) {
return range.set(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
}
public static ByteRange fillQualifierRange(Cell cell, ByteRange range) {
return range.set(cell.getQualifierArray(), cell.getQualifierOffset(),
cell.getQualifierLength());
}
/***************** get individual arrays for tests ************/
public static byte[] getRowArray(Cell cell){
byte[] output = new byte[cell.getRowLength()];
copyRowTo(cell, output, 0);
return output;
}
public static byte[] getFamilyArray(Cell cell){
byte[] output = new byte[cell.getFamilyLength()];
copyFamilyTo(cell, output, 0);
return output;
}
public static byte[] getQualifierArray(Cell cell){
byte[] output = new byte[cell.getQualifierLength()];
copyQualifierTo(cell, output, 0);
return output;
}
public static byte[] getValueArray(Cell cell){
byte[] output = new byte[cell.getValueLength()];
copyValueTo(cell, output, 0);
return output;
}
/******************** copyTo **********************************/
public static int copyRowTo(Cell cell, byte[] destination, int destinationOffset) {
System.arraycopy(cell.getRowArray(), cell.getRowOffset(), destination, destinationOffset,
cell.getRowLength());
return destinationOffset + cell.getRowLength();
}
public static int copyFamilyTo(Cell cell, byte[] destination, int destinationOffset) {
System.arraycopy(cell.getFamilyArray(), cell.getFamilyOffset(), destination, destinationOffset,
cell.getFamilyLength());
return destinationOffset + cell.getFamilyLength();
}
public static int copyQualifierTo(Cell cell, byte[] destination, int destinationOffset) {
System.arraycopy(cell.getQualifierArray(), cell.getQualifierOffset(), destination,
destinationOffset, cell.getQualifierLength());
return destinationOffset + cell.getQualifierLength();
}
public static int copyValueTo(Cell cell, byte[] destination, int destinationOffset) {
System.arraycopy(cell.getValueArray(), cell.getValueOffset(), destination, destinationOffset,
cell.getValueLength());
return destinationOffset + cell.getValueLength();
}
/********************* misc *************************************/
public static byte getRowByte(Cell cell, int index) {
return cell.getRowArray()[cell.getRowOffset() + index];
}
/********************** KeyValue (move to KeyValueUtils) *********************/
public static ByteBuffer getValueBufferShallowCopy(Cell cell) {
ByteBuffer buffer = ByteBuffer.wrap(cell.getValueArray(), cell.getValueOffset(),
cell.getValueLength());
// buffer.position(buffer.limit());//make it look as if value was appended
return buffer;
}
}

View File

@ -0,0 +1,75 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.hadoop.hbase.SmallTests;
import org.junit.experimental.categories.Category;
@Category(SmallTests.class)
public class TestByteRange extends TestCase {
public void testEmpty(){
Assert.assertTrue(ByteRange.isEmpty(null));
ByteRange r = new ByteRange();
Assert.assertTrue(ByteRange.isEmpty(r));
Assert.assertFalse(ByteRange.notEmpty(r));
Assert.assertTrue(r.isEmpty());
Assert.assertFalse(r.notEmpty());
Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior
Assert.assertEquals(0, r.getBytes().length);
Assert.assertEquals(0, r.getOffset());
Assert.assertEquals(0, r.getLength());
Assert.assertTrue(Bytes.equals(new byte[0], r.deepCopyToNewArray()));
Assert.assertEquals(0, r.compareTo(new ByteRange(new byte[0], 0, 0)));
Assert.assertEquals(0, r.hashCode());
}
public void testBasics(){
ByteRange r = new ByteRange(new byte[]{1, 3, 2});
Assert.assertFalse(ByteRange.isEmpty(r));
Assert.assertNotNull(r.getBytes());//should be empty byte[], but could change this behavior
Assert.assertEquals(3, r.getBytes().length);
Assert.assertEquals(0, r.getOffset());
Assert.assertEquals(3, r.getLength());
//cloning (deep copying)
Assert.assertTrue(Bytes.equals(new byte[]{1, 3, 2}, r.deepCopyToNewArray()));
Assert.assertNotSame(r.getBytes(), r.deepCopyToNewArray());
//hash code
Assert.assertTrue(r.hashCode() > 0);
Assert.assertEquals(r.hashCode(), r.deepCopy().hashCode());
//copying to arrays
byte[] destination = new byte[]{-59};//junk
r.deepCopySubRangeTo(2, 1, destination, 0);
Assert.assertTrue(Bytes.equals(new byte[]{2}, destination));
//set length
r.setLength(1);
Assert.assertTrue(Bytes.equals(new byte[]{1}, r.deepCopyToNewArray()));
r.setLength(2);//verify we retained the 2nd byte, but dangerous in real code
Assert.assertTrue(Bytes.equals(new byte[]{1, 3}, r.deepCopyToNewArray()));
}
}

View File

@ -28,8 +28,10 @@ import java.util.Arrays;
import java.util.Random;
import junit.framework.TestCase;
import org.junit.experimental.categories.Category;
import org.apache.hadoop.hbase.SmallTests;
import org.junit.Assert;
import org.junit.experimental.categories.Category;
@Category(SmallTests.class)
@ -376,7 +378,7 @@ public class TestBytes extends TestCase {
assertEquals("World", Bytes.readStringFixedSize(dis, 18));
assertEquals("", Bytes.readStringFixedSize(dis, 9));
}
public void testCopy() throws Exception {
byte [] bytes = Bytes.toBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
byte [] copy = Bytes.copy(bytes);
@ -396,5 +398,32 @@ public class TestBytes extends TestCase {
String bytes = Bytes.toStringBinary(Bytes.toBytes(2.17));
assertEquals(2.17, Bytes.toDouble(Bytes.toBytesBinary(bytes)), 0);
}
public void testUnsignedBinarySearch(){
byte[] bytes = new byte[]{0,5,123,127,-128,-100,-1};
Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)5), 1);
Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)127), 3);
Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-128), 4);
Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-100), 5);
Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-1), 6);
Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)2), -1-1);
Assert.assertEquals(Bytes.unsignedBinarySearch(bytes, 0, bytes.length, (byte)-5), -6-1);
}
public void testUnsignedIncrement(){
byte[] a = Bytes.toBytes(0);
int a2 = Bytes.toInt(Bytes.unsignedCopyAndIncrement(a), 0);
Assert.assertTrue(a2==1);
byte[] b = Bytes.toBytes(-1);
byte[] actual = Bytes.unsignedCopyAndIncrement(b);
Assert.assertNotSame(b, actual);
byte[] expected = new byte[]{1,0,0,0,0};
Assert.assertArrayEquals(expected, actual);
byte[] c = Bytes.toBytes(255);//should wrap to the next significant byte
int c2 = Bytes.toInt(Bytes.unsignedCopyAndIncrement(c), 0);
Assert.assertTrue(c2==256);
}
}

View File

@ -352,4 +352,21 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
}
}
/**
* Asserts that there is at least the given amount of unfilled space
* remaining in the given buffer.
* @param out typically, the buffer we are writing to
* @param length the required space in the buffer
* @throws EncoderBufferTooSmallException If there are no enough bytes.
*/
protected static void ensureSpace(ByteBuffer out, int length)
throws EncoderBufferTooSmallException {
if (out.position() + length > out.limit()) {
throw new EncoderBufferTooSmallException(
"Buffer position=" + out.position() +
", buffer limit=" + out.limit() +
", length to be written=" + length);
}
}
}

View File

@ -233,8 +233,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
// create KeyValue buffer and fill it prefix
int keyOffset = buffer.position();
ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength
+ KeyValue.ROW_OFFSET);
ensureSpace(buffer, keyLength + valueLength + KeyValue.ROW_OFFSET);
buffer.putInt(keyLength);
buffer.putInt(valueLength);

View File

@ -230,8 +230,7 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
}
int commonLength = ByteBufferUtils.readCompressedInt(source);
ByteBufferUtils.ensureSpace(out, state.keyLength + state.valueLength +
KeyValue.ROW_OFFSET);
ensureSpace(out, state.keyLength + state.valueLength + KeyValue.ROW_OFFSET);
int kvPos = out.position();

View File

@ -126,8 +126,7 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
int keyOffset;
keyLength += commonLength;
ByteBufferUtils.ensureSpace(buffer, keyLength + valueLength
+ KeyValue.ROW_OFFSET);
ensureSpace(buffer, keyLength + valueLength + KeyValue.ROW_OFFSET);
buffer.putInt(keyLength);
buffer.putInt(valueLength);

View File

@ -725,7 +725,7 @@ public class HFileBlock implements Cacheable {
this.dataBlockEncoder = dataBlockEncoder != null
? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
defaultBlockEncodingCtx =
new HFileBlockDefaultEncodingContext(compressionAlgorithm, null);
new HFileBlockDefaultEncodingContext(compressionAlgorithm, null, DUMMY_HEADER);
dataBlockEncodingCtx =
this.dataBlockEncoder.newOnDiskDataBlockEncodingContext(
compressionAlgorithm, DUMMY_HEADER);

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
@ -75,7 +76,7 @@ public class TestDataBlockEncoders {
return encoder.newDataBlockEncodingContext(algo, encoding,
HFileBlock.DUMMY_HEADER);
} else {
return new HFileBlockDefaultEncodingContext(algo, encoding);
return new HFileBlockDefaultEncodingContext(algo, encoding, HFileBlock.DUMMY_HEADER);
}
}

View File

@ -27,19 +27,17 @@ import java.util.Map;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.MultiThreadedWriter;
import org.apache.hadoop.hbase.util.Strings;
import org.apache.hadoop.hbase.util.test.LoadTestKVGenerator;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@ -59,9 +57,10 @@ public class TestEncodedSeekers {
private static final byte[] CF_BYTES = Bytes.toBytes(CF_NAME);
private static final int MAX_VERSIONS = 5;
private static final int BLOCK_SIZE = 64 * 1024;
private static final int MIN_VALUE_SIZE = 30;
private static final int MAX_VALUE_SIZE = 60;
private static final int NUM_ROWS = 1000;
private static final int NUM_ROWS = 1003;
private static final int NUM_COLS_PER_ROW = 20;
private static final int NUM_HFILES = 4;
private static final int NUM_ROWS_PER_FLUSH = NUM_ROWS / NUM_HFILES;
@ -101,61 +100,73 @@ public class TestEncodedSeekers {
.setMaxVersions(MAX_VERSIONS)
.setDataBlockEncoding(encoding)
.setEncodeOnDisk(encodeOnDisk)
.setBlocksize(BLOCK_SIZE)
);
LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(
MIN_VALUE_SIZE, MAX_VALUE_SIZE);
// Write
for (int i = 0; i < NUM_ROWS; ++i) {
//write the data, but leave some in the memstore
doPuts(region);
//verify correctness when memstore contains data
doGets(region);
//verify correctness again after compacting
region.compactStores();
doGets(region);
Map<DataBlockEncoding, Integer> encodingCounts = cache.getEncodingCountsForTest();
// Ensure that compactions don't pollute the cache with unencoded blocks
// in case of in-cache-only encoding.
System.err.println("encodingCounts=" + encodingCounts);
assertEquals(1, encodingCounts.size());
DataBlockEncoding encodingInCache = encodingCounts.keySet().iterator().next();
assertEquals(encoding, encodingInCache);
assertTrue(encodingCounts.get(encodingInCache) > 0);
}
private void doPuts(HRegion region) throws IOException{
LoadTestKVGenerator dataGenerator = new LoadTestKVGenerator(MIN_VALUE_SIZE, MAX_VALUE_SIZE);
for (int i = 0; i < NUM_ROWS; ++i) {
byte[] key = MultiThreadedWriter.longToByteArrayKey(i);
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
Put put = new Put(key);
String colAsStr = String.valueOf(j);
byte[] col = Bytes.toBytes(colAsStr);
byte[] value = dataGenerator.generateRandomSizeValue(i, colAsStr);
put.add(CF_BYTES, Bytes.toBytes(colAsStr), value);
if(VERBOSE){
KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value);
System.err.println(Strings.padFront(i+"", ' ', 4)+" "+kvPut);
}
region.put(put);
}
if (i % NUM_ROWS_PER_FLUSH == 0) {
region.flushcache();
}
}
for (int doneCompaction = 0; doneCompaction <= 1; ++doneCompaction) {
// Read
for (int i = 0; i < NUM_ROWS; ++i) {
final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i);
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
if (VERBOSE) {
System.err.println("Reading row " + i + ", column " + j);
}
final String qualStr = String.valueOf(j);
final byte[] qualBytes = Bytes.toBytes(qualStr);
Get get = new Get(rowKey);
get.addColumn(CF_BYTES, qualBytes);
Result result = region.get(get, null);
assertEquals(1, result.size());
assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr,
result.getValue(CF_BYTES, qualBytes)));
}
private void doGets(HRegion region) throws IOException{
for (int i = 0; i < NUM_ROWS; ++i) {
final byte[] rowKey = MultiThreadedWriter.longToByteArrayKey(i);
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
final String qualStr = String.valueOf(j);
if (VERBOSE) {
System.err.println("Reading row " + i + ", column " + j + " " + Bytes.toString(rowKey)+"/"
+qualStr);
}
}
if (doneCompaction == 0) {
// Compact, then read again at the next loop iteration.
region.compactStores();
final byte[] qualBytes = Bytes.toBytes(qualStr);
Get get = new Get(rowKey);
get.addColumn(CF_BYTES, qualBytes);
Result result = region.get(get, null);
assertEquals(1, result.size());
assertTrue(LoadTestKVGenerator.verify(Bytes.toString(rowKey), qualStr,
result.getValue(CF_BYTES, qualBytes)));
}
}
Map<DataBlockEncoding, Integer> encodingCounts =
cache.getEncodingCountsForTest();
// Ensure that compactions don't pollute the cache with unencoded blocks
// in case of in-cache-only encoding.
System.err.println("encodingCounts=" + encodingCounts);
assertEquals(1, encodingCounts.size());
DataBlockEncoding encodingInCache =
encodingCounts.keySet().iterator().next();
assertEquals(encoding, encodingInCache);
assertTrue(encodingCounts.get(encodingInCache) > 0);
}
}

View File

@ -33,9 +33,8 @@ import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.encoding.RedundantKVGenerator;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.junit.After;
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@ -112,9 +111,8 @@ public class TestHFileDataBlockEncoder {
public void testEncodingWritePath() throws IOException {
// usually we have just block without headers, but don't complicate that
HFileBlock block = getSampleHFileBlock();
HFileBlockEncodingContext context =
new HFileBlockDefaultEncodingContext(
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk());
HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext(
Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), HFileBlock.DUMMY_HEADER);
blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(),
includesMemstoreTS, context, block.getBlockType());

View File

@ -310,4 +310,16 @@ public class TestByteBufferUtils {
throw new RuntimeException("Bug in test!", e);
}
}
@Test
public void testToBytes(){
ByteBuffer buffer = ByteBuffer.allocate(5);
buffer.put(new byte[]{0,1,2,3,4});
assertEquals(5, buffer.position());
assertEquals(5, buffer.limit());
byte[] copy = ByteBufferUtils.toBytes(buffer, 2);
assertArrayEquals(new byte[]{2,3,4}, copy);
assertEquals(5, buffer.position());
assertEquals(5, buffer.limit());
}
}