HBASE-8496 - Implement tags and the internals of how a tag should look like (Ram)

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1525269 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
ramkrishna 2013-09-21 18:01:32 +00:00
parent 057551d5c7
commit a02bd8cc0d
135 changed files with 5580 additions and 1912 deletions

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
@ -96,7 +97,7 @@ public abstract class Mutation extends OperationWithAttributes implements Row, C
}
/*
* Create a nnnnnnnn with this objects row key and the Put identifier.
* Create a KeyValue with this objects row key and the Put identifier.
*
* @return a KeyValue with this objects row key and the Put identifier.
*/
@ -104,6 +105,20 @@ public abstract class Mutation extends OperationWithAttributes implements Row, C
return new KeyValue(this.row, family, qualifier, ts, KeyValue.Type.Put, value);
}
/**
* Create a KeyValue with this objects row key and the Put identifier.
* @param family
* @param qualifier
* @param ts
* @param value
* @param tags - Specify the Tags as an Array {@link KeyValue.Tag}
* @return a KeyValue with this objects row key and the Put identifier.
*/
KeyValue createPutKeyValue(byte[] family, byte[] qualifier, long ts, byte[] value, Tag[] tags) {
KeyValue kvWithTag = new KeyValue(this.row, family, qualifier, ts, value, tags);
return kvWithTag;
}
/**
* Compile the column family (i.e. schema) information
* into a Map. Useful for parsing and aggregation by debugging,

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.util.Bytes;
@ -112,6 +113,10 @@ public class Put extends Mutation implements HeapSize, Comparable<Row> {
return add(family, qualifier, this.ts, value);
}
public Put add(byte[] family, byte [] qualifier, byte [] value, Tag[] tag) {
return add(family, qualifier, this.ts, value, tag);
}
/**
* Add the specified column and value, with the specified timestamp as
* its version to this Put operation.
@ -132,6 +137,18 @@ public class Put extends Mutation implements HeapSize, Comparable<Row> {
return this;
}
/**
* Forms a keyvalue with tags
*/
@SuppressWarnings("unchecked")
public Put add(byte[] family, byte[] qualifier, long ts, byte[] value, Tag[] tag) {
List<Cell> list = getCellList(family);
KeyValue kv = createPutKeyValue(family, qualifier, ts, value, tag);
list.add(kv);
familyMap.put(kv.getFamily(), list);
return this;
}
/**
* Add the specified KeyValue to this Put operation. Operation assumes that
* the passed KeyValue is immutable and its backing array will not be modified

View File

@ -40,7 +40,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
@ -50,6 +49,8 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.client.Append;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Durability;
@ -99,12 +100,12 @@ import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.Col
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest;
import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad;
import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos;
import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameBytesPair;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo;
import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType;
import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos;
@ -471,7 +472,18 @@ public final class ProtobufUtil {
if (qv.hasTimestamp()) {
ts = qv.getTimestamp();
}
put.add(family, qualifier, ts, value);
byte[] tags;
if (qv.hasTags()) {
tags = qv.getTags().toByteArray();
Object[] array = Tag.createTags(tags, 0, (short)tags.length).toArray();
Tag[] tagArray = new Tag[array.length];
for(int i = 0; i< array.length; i++) {
tagArray[i] = (Tag)array[i];
}
put.add(family, qualifier, ts, value, tagArray);
} else {
put.add(family, qualifier, ts, value);
}
}
}
}
@ -972,6 +984,9 @@ public final class ProtobufUtil {
valueBuilder.setQualifier(ByteString.copyFrom(kv.getQualifier()));
valueBuilder.setValue(ByteString.copyFrom(kv.getValue()));
valueBuilder.setTimestamp(kv.getTimestamp());
if(cell.getTagsLength() > 0) {
valueBuilder.setTags(ByteString.copyFrom(CellUtil.getTagArray(kv)));
}
if (type == MutationType.DELETE) {
KeyValue.Type keyValueType = KeyValue.Type.codeToType(kv.getType());
valueBuilder.setDeleteType(toDeleteType(keyValueType));

View File

@ -53,6 +53,9 @@ public final class CellUtil {
cell.getQualifierLength());
}
public static ByteRange fillTagRange(Cell cell, ByteRange range) {
return range.set(cell.getTagsArray(), cell.getTagsOffset(), cell.getTagsLength());
}
/***************** get individual arrays for tests ************/
@ -79,6 +82,12 @@ public final class CellUtil {
copyValueTo(cell, output, 0);
return output;
}
public static byte[] getTagArray(Cell cell){
byte[] output = new byte[cell.getTagsLength()];
copyTagTo(cell, output, 0);
return output;
}
/******************** copyTo **********************************/
@ -103,10 +112,22 @@ public final class CellUtil {
public static int copyValueTo(Cell cell, byte[] destination, int destinationOffset) {
System.arraycopy(cell.getValueArray(), cell.getValueOffset(), destination, destinationOffset,
cell.getValueLength());
cell.getValueLength());
return destinationOffset + cell.getValueLength();
}
/**
* Copies the tags info into the tag portion of the cell
* @param cell
* @param destination
* @param destinationOffset
* @return position after tags
*/
public static int copyTagTo(Cell cell, byte[] destination, int destinationOffset) {
System.arraycopy(cell.getTagsArray(), cell.getTagsOffset(), destination, destinationOffset,
cell.getTagsLength());
return destinationOffset + cell.getTagsLength();
}
/********************* misc *************************************/
@ -134,18 +155,23 @@ public final class CellUtil {
return new KeyValue(row, family, qualifier, timestamp,
KeyValue.Type.codeToType(type), value);
}
public static Cell createCell(final byte[] row, final byte[] family, final byte[] qualifier,
final long timestamp, final byte type, final byte[] value, final long memstoreTS) {
// I need a Cell Factory here. Using KeyValue for now. TODO.
// TODO: Make a new Cell implementation that just carries these
// byte arrays.
KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp,
KeyValue.Type.codeToType(type), value);
keyValue.setMvccVersion(memstoreTS);
return keyValue;
}
public static Cell createCell(final byte[] row, final byte[] family, final byte[] qualifier,
final long timestamp, final byte type, final byte[] value, byte[] tags, final long memstoreTS) {
KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp,
KeyValue.Type.codeToType(type), value, tags);
keyValue.setMvccVersion(memstoreTS);
return keyValue;
}
/**
* @param cellScannerables
* @return CellScanner interface over <code>cellIterables</code>

View File

@ -27,9 +27,12 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
@ -66,7 +69,14 @@ import com.google.common.primitives.Longs;
* The <code>rowlength</code> maximum is <code>Short.MAX_SIZE</code>, column family length maximum
* is <code>Byte.MAX_SIZE</code>, and column qualifier + key length must be <
* <code>Integer.MAX_SIZE</code>. The column does not contain the family/qualifier delimiter,
* {@link #COLUMN_FAMILY_DELIMITER}
* {@link #COLUMN_FAMILY_DELIMITER}<br>
* KeyValue can optionally contain Tags. When it contains tags, it is added in the byte array after
* the value part. The format for this part is: <code>&lt;tagslength>&lt;tagsbytes></code>.
* <code>tagslength</code> maximum is <code>Short.MAX_SIZE</code>. The <code>tagsbytes</code>
* contain one or more tags where as each tag is of the form
* <code>&lt;taglength>&lt;tagtype>&lt;tagbytes></code>. <code>tagtype</code> is one byte and
* <code>taglength</code> maximum is <code>Short.MAX_SIZE</code> and it includes 1 byte type length
* and actual tag bytes length.
*/
@InterfaceAudience.Private
public class KeyValue implements Cell, HeapSize, Cloneable {
@ -127,6 +137,11 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
// Size of the length ints in a KeyValue datastructure.
public static final int KEYVALUE_INFRASTRUCTURE_SIZE = ROW_OFFSET;
/** Size of the tags length field in bytes */
public static final int TAGS_LENGTH_SIZE = Bytes.SIZEOF_SHORT;
public static final int KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE = ROW_OFFSET + TAGS_LENGTH_SIZE;
/**
* Computes the number of bytes that a <code>KeyValue</code> instance with the provided
* characteristics would take up for its underlying data structure.
@ -140,8 +155,46 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
*/
public static long getKeyValueDataStructureSize(int rlength,
int flength, int qlength, int vlength) {
return KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE +
getKeyDataStructureSize(rlength, flength, qlength) + vlength;
return KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE
+ getKeyDataStructureSize(rlength, flength, qlength) + vlength;
}
/**
* Computes the number of bytes that a <code>KeyValue</code> instance with the provided
* characteristics would take up for its underlying data structure.
*
* @param rlength row length
* @param flength family length
* @param qlength qualifier length
* @param vlength value length
* @param tagsLength total length of the tags
*
* @return the <code>KeyValue</code> data structure length
*/
public static long getKeyValueDataStructureSize(int rlength, int flength, int qlength,
int vlength, int tagsLength) {
if (tagsLength == 0) {
return getKeyValueDataStructureSize(rlength, flength, qlength, vlength);
}
return KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE
+ getKeyDataStructureSize(rlength, flength, qlength) + vlength + tagsLength;
}
/**
* Computes the number of bytes that a <code>KeyValue</code> instance with the provided
* characteristics would take up for its underlying data structure.
*
* @param klength key length
* @param vlength value length
* @param tagsLength total length of the tags
*
* @return the <code>KeyValue</code> data structure length
*/
public static long getKeyValueDataStructureSize(int klength, int vlength, int tagsLength) {
if (tagsLength == 0) {
return KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + klength + vlength;
}
return KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE + klength + vlength + tagsLength;
}
/**
@ -201,6 +254,38 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
}
}
/**
* @return an iterator over the tags in this KeyValue.
*/
public Iterator<Tag> tagsIterator() {
// Subtract -1 to point to the end of the complete tag byte[]
final int endOffset = this.offset + this.length - 1;
return new Iterator<Tag>() {
private int pos = getTagsOffset();
@Override
public boolean hasNext() {
return this.pos < endOffset;
}
@Override
public Tag next() {
if (hasNext()) {
short curTagLen = Bytes.toShort(bytes, this.pos);
Tag tag = new Tag(bytes, pos, (short) (curTagLen + Bytes.SIZEOF_SHORT));
this.pos += Bytes.SIZEOF_SHORT + curTagLen;
return tag;
}
return null;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
/**
* Lowest possible key.
* Makes a Key with highest possible Timestamp, empty row and column. No
@ -365,6 +450,42 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
this(row, family, qualifier, timestamp, Type.Put, value);
}
/**
* Constructs KeyValue structure filled with specified values.
* @param row row key
* @param family family name
* @param qualifier column qualifier
* @param timestamp version timestamp
* @param value column value
* @param tags tags
* @throws IllegalArgumentException
*/
public KeyValue(final byte[] row, final byte[] family,
final byte[] qualifier, final long timestamp, final byte[] value,
final Tag[] tags) {
this(row, family, qualifier, timestamp, value, Arrays.asList(tags));
}
/**
* Constructs KeyValue structure filled with specified values.
* @param row row key
* @param family family name
* @param qualifier column qualifier
* @param timestamp version timestamp
* @param value column value
* @param tags tags non-empty list of tags or null
* @throws IllegalArgumentException
*/
public KeyValue(final byte[] row, final byte[] family,
final byte[] qualifier, final long timestamp, final byte[] value,
final List<Tag> tags) {
this(row, 0, row==null ? 0 : row.length,
family, 0, family==null ? 0 : family.length,
qualifier, 0, qualifier==null ? 0 : qualifier.length,
timestamp, Type.Put,
value, 0, value==null ? 0 : value.length, tags);
}
/**
* Constructs KeyValue structure filled with specified values.
* @param row row key
@ -382,6 +503,144 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
timestamp, type, value, 0, len(value));
}
/**
* Constructs KeyValue structure filled with specified values.
* <p>
* Column is split into two fields, family and qualifier.
* @param row row key
* @param family family name
* @param qualifier column qualifier
* @param timestamp version timestamp
* @param type key type
* @param value column value
* @throws IllegalArgumentException
*/
public KeyValue(final byte[] row, final byte[] family,
final byte[] qualifier, final long timestamp, Type type,
final byte[] value, final List<Tag> tags) {
this(row, family, qualifier, 0, qualifier==null ? 0 : qualifier.length,
timestamp, type, value, 0, value==null ? 0 : value.length, tags);
}
/**
* Constructs KeyValue structure filled with specified values.
* @param row row key
* @param family family name
* @param qualifier column qualifier
* @param timestamp version timestamp
* @param type key type
* @param value column value
* @throws IllegalArgumentException
*/
public KeyValue(final byte[] row, final byte[] family,
final byte[] qualifier, final long timestamp, Type type,
final byte[] value, final byte[] tags) {
this(row, family, qualifier, 0, qualifier==null ? 0 : qualifier.length,
timestamp, type, value, 0, value==null ? 0 : value.length, tags);
}
/**
* Constructs KeyValue structure filled with specified values.
* @param row row key
* @param family family name
* @param qualifier column qualifier
* @param qoffset qualifier offset
* @param qlength qualifier length
* @param timestamp version timestamp
* @param type key type
* @param value column value
* @param voffset value offset
* @param vlength value length
* @throws IllegalArgumentException
*/
public KeyValue(byte [] row, byte [] family,
byte [] qualifier, int qoffset, int qlength, long timestamp, Type type,
byte [] value, int voffset, int vlength, List<Tag> tags) {
this(row, 0, row==null ? 0 : row.length,
family, 0, family==null ? 0 : family.length,
qualifier, qoffset, qlength, timestamp, type,
value, voffset, vlength, tags);
}
/**
* @param row
* @param family
* @param qualifier
* @param qoffset
* @param qlength
* @param timestamp
* @param type
* @param value
* @param voffset
* @param vlength
* @param tags
*/
public KeyValue(byte [] row, byte [] family,
byte [] qualifier, int qoffset, int qlength, long timestamp, Type type,
byte [] value, int voffset, int vlength, byte[] tags) {
this(row, 0, row==null ? 0 : row.length,
family, 0, family==null ? 0 : family.length,
qualifier, qoffset, qlength, timestamp, type,
value, voffset, vlength, tags, 0, tags==null ? 0 : tags.length);
}
/**
* Constructs KeyValue structure filled with specified values.
* <p>
* Column is split into two fields, family and qualifier.
* @param row row key
* @throws IllegalArgumentException
*/
public KeyValue(final byte [] row, final int roffset, final int rlength,
final byte [] family, final int foffset, final int flength,
final byte [] qualifier, final int qoffset, final int qlength,
final long timestamp, final Type type,
final byte [] value, final int voffset, final int vlength) {
this(row, roffset, rlength, family, foffset, flength, qualifier, qoffset,
qlength, timestamp, type, value, voffset, vlength, null);
}
/**
* Constructs KeyValue structure filled with specified values. Uses the provided buffer as the
* data buffer.
* <p>
* Column is split into two fields, family and qualifier.
*
* @param buffer the bytes buffer to use
* @param boffset buffer offset
* @param row row key
* @param roffset row offset
* @param rlength row length
* @param family family name
* @param foffset family offset
* @param flength family length
* @param qualifier column qualifier
* @param qoffset qualifier offset
* @param qlength qualifier length
* @param timestamp version timestamp
* @param type key type
* @param value column value
* @param voffset value offset
* @param vlength value length
* @param tags non-empty list of tags or null
* @throws IllegalArgumentException an illegal value was passed or there is insufficient space
* remaining in the buffer
*/
public KeyValue(byte [] buffer, final int boffset,
final byte [] row, final int roffset, final int rlength,
final byte [] family, final int foffset, final int flength,
final byte [] qualifier, final int qoffset, final int qlength,
final long timestamp, final Type type,
final byte [] value, final int voffset, final int vlength,
final Tag[] tags) {
this.bytes = buffer;
this.length = writeByteArray(buffer, boffset,
row, roffset, rlength,
family, foffset, flength, qualifier, qoffset, qlength,
timestamp, type, value, voffset, vlength, tags);
this.offset = boffset;
}
/**
* Constructs KeyValue structure filled with specified values.
* <p>
@ -400,16 +659,48 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
* @param value column value
* @param voffset value offset
* @param vlength value length
* @param tags tags
* @throws IllegalArgumentException
*/
public KeyValue(final byte [] row, final int roffset, final int rlength,
final byte [] family, final int foffset, final int flength,
final byte [] qualifier, final int qoffset, final int qlength,
final long timestamp, final Type type,
final byte [] value, final int voffset, final int vlength) {
final byte [] value, final int voffset, final int vlength,
final List<Tag> tags) {
this.bytes = createByteArray(row, roffset, rlength,
family, foffset, flength, qualifier, qoffset, qlength,
timestamp, type, value, voffset, vlength);
timestamp, type, value, voffset, vlength, tags);
this.length = bytes.length;
this.offset = 0;
}
/**
* @param row
* @param roffset
* @param rlength
* @param family
* @param foffset
* @param flength
* @param qualifier
* @param qoffset
* @param qlength
* @param timestamp
* @param type
* @param value
* @param voffset
* @param vlength
* @param tags
*/
public KeyValue(final byte [] row, final int roffset, final int rlength,
final byte [] family, final int foffset, final int flength,
final byte [] qualifier, final int qoffset, final int qlength,
final long timestamp, final Type type,
final byte [] value, final int voffset, final int vlength,
final byte[] tags, final int tagsOffset, final int tagsLength) {
this.bytes = createByteArray(row, roffset, rlength,
family, foffset, flength, qualifier, qoffset, qlength,
timestamp, type, value, voffset, vlength, tags, tagsOffset, tagsLength);
this.length = bytes.length;
this.offset = 0;
}
@ -432,9 +723,30 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
final int qlength,
final long timestamp, final Type type,
final int vlength) {
this.bytes = createEmptyByteArray(rlength,
flength, qlength,
timestamp, type, vlength);
this(rlength, flength, qlength, timestamp, type, vlength, 0);
}
/**
* Constructs an empty KeyValue structure, with specified sizes.
* This can be used to partially fill up KeyValues.
* <p>
* Column is split into two fields, family and qualifier.
* @param rlength row length
* @param flength family length
* @param qlength qualifier length
* @param timestamp version timestamp
* @param type key type
* @param vlength value length
* @param tagsLength
* @throws IllegalArgumentException
*/
public KeyValue(final int rlength,
final int flength,
final int qlength,
final long timestamp, final Type type,
final int vlength, final int tagsLength) {
this.bytes = createEmptyByteArray(rlength, flength, qlength, timestamp, type, vlength,
tagsLength);
this.length = bytes.length;
this.offset = 0;
}
@ -459,7 +771,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
* @return The newly created byte array.
*/
private static byte[] createEmptyByteArray(final int rlength, int flength,
int qlength, final long timestamp, final Type type, int vlength) {
int qlength, final long timestamp, final Type type, int vlength, int tagsLength) {
if (rlength > Short.MAX_VALUE) {
throw new IllegalArgumentException("Row > " + Short.MAX_VALUE);
}
@ -470,6 +782,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
if (qlength > Integer.MAX_VALUE - rlength - flength) {
throw new IllegalArgumentException("Qualifier > " + Integer.MAX_VALUE);
}
checkForTagsLength(tagsLength);
// Key length
long longkeylength = getKeyDataStructureSize(rlength, flength, qlength);
if (longkeylength > Integer.MAX_VALUE) {
@ -484,8 +797,8 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
}
// Allocate right-sized byte array.
byte [] bytes =
new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength)];
byte[] bytes= new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength,
tagsLength)];
// Write the correct size markers
int pos = 0;
pos = Bytes.putInt(bytes, pos, keylength);
@ -496,6 +809,10 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
pos += flength + qlength;
pos = Bytes.putLong(bytes, pos, timestamp);
pos = Bytes.putByte(bytes, pos, type.getCode());
pos += keylength + vlength;
if (tagsLength > 0) {
pos = Bytes.putShort(bytes, pos, (short)(tagsLength & 0x0000ffff));
}
return bytes;
}
@ -518,7 +835,6 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
final byte [] qualifier, int qlength,
final byte [] value, int vlength)
throws IllegalArgumentException {
if (rlength > Short.MAX_VALUE) {
throw new IllegalArgumentException("Row > " + Short.MAX_VALUE);
}
@ -579,12 +895,21 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
final byte [] family, final int foffset, int flength,
final byte [] qualifier, final int qoffset, int qlength,
final long timestamp, final Type type,
final byte [] value, final int voffset, int vlength) {
final byte [] value, final int voffset, int vlength, Tag[] tags) {
checkParameters(row, rlength, family, flength, qualifier, qlength, value, vlength);
// Calculate length of tags area
int tagsLength = 0;
if (tags != null && tags.length > 0) {
for (Tag t: tags) {
tagsLength += t.getLength();
}
}
checkForTagsLength(tagsLength);
int keyLength = (int) getKeyDataStructureSize(rlength, flength, qlength);
int keyValueLength = (int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength);
int keyValueLength = (int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength,
tagsLength);
if (keyValueLength > buffer.length - boffset) {
throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < " +
keyValueLength);
@ -608,13 +933,24 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
if (value != null && value.length > 0) {
pos = Bytes.putBytes(buffer, pos, value, voffset, vlength);
}
// Write the number of tags. If it is 0 then it means there are no tags.
if (tagsLength > 0) {
pos = Bytes.putShort(buffer, pos, (short) tagsLength);
for (Tag t : tags) {
pos = Bytes.putBytes(buffer, pos, t.getBuffer(), t.getOffset(), t.getLength());
}
}
return keyValueLength;
}
private static void checkForTagsLength(int tagsLength) {
if (tagsLength > Short.MAX_VALUE) {
throw new IllegalArgumentException("tagslength "+ tagsLength + " > " + Short.MAX_VALUE);
}
}
/**
* Write KeyValue format into a byte array.
*
* @param row row key
* @param roffset row offset
* @param rlength row length
@ -635,14 +971,15 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
final int rlength, final byte [] family, final int foffset, int flength,
final byte [] qualifier, final int qoffset, int qlength,
final long timestamp, final Type type,
final byte [] value, final int voffset, int vlength) {
final byte [] value, final int voffset,
int vlength, byte[] tags, int tagsOffset, int tagsLength) {
checkParameters(row, rlength, family, flength, qualifier, qlength, value, vlength);
checkForTagsLength(tagsLength);
// Allocate right-sized byte array.
int keyLength = (int) getKeyDataStructureSize(rlength, flength, qlength);
byte [] bytes =
new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength)];
new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength, tagsLength)];
// Write key, value and key row length.
int pos = 0;
pos = Bytes.putInt(bytes, pos, keyLength);
@ -661,8 +998,64 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
if (value != null && value.length > 0) {
pos = Bytes.putBytes(bytes, pos, value, voffset, vlength);
}
// Add the tags after the value part
if (tagsLength > 0) {
pos = Bytes.putShort(bytes, pos, (short) (tagsLength));
pos = Bytes.putBytes(bytes, pos, tags, tagsOffset, tagsLength);
}
return bytes;
}
private static byte [] createByteArray(final byte [] row, final int roffset,
final int rlength, final byte [] family, final int foffset, int flength,
final byte [] qualifier, final int qoffset, int qlength,
final long timestamp, final Type type,
final byte [] value, final int voffset, int vlength, List<Tag> tags) {
checkParameters(row, rlength, family, flength, qualifier, qlength, value, vlength);
// Calculate length of tags area
int tagsLength = 0;
if (tags != null && !tags.isEmpty()) {
for (Tag t : tags) {
tagsLength += t.getLength();
}
}
checkForTagsLength(tagsLength);
// Allocate right-sized byte array.
int keyLength = (int) getKeyDataStructureSize(rlength, flength, qlength);
byte[] bytes = new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength,
tagsLength)];
// Write key, value and key row length.
int pos = 0;
pos = Bytes.putInt(bytes, pos, keyLength);
pos = Bytes.putInt(bytes, pos, vlength);
pos = Bytes.putShort(bytes, pos, (short)(rlength & 0x0000ffff));
pos = Bytes.putBytes(bytes, pos, row, roffset, rlength);
pos = Bytes.putByte(bytes, pos, (byte)(flength & 0x0000ff));
if(flength != 0) {
pos = Bytes.putBytes(bytes, pos, family, foffset, flength);
}
if(qlength != 0) {
pos = Bytes.putBytes(bytes, pos, qualifier, qoffset, qlength);
}
pos = Bytes.putLong(bytes, pos, timestamp);
pos = Bytes.putByte(bytes, pos, type.getCode());
if (value != null && value.length > 0) {
pos = Bytes.putBytes(bytes, pos, value, voffset, vlength);
}
// Add the tags after the value part
if (tagsLength > 0) {
pos = Bytes.putShort(bytes, pos, (short) (tagsLength));
for (Tag t : tags) {
pos = Bytes.putBytes(bytes, pos, t.getBuffer(), t.getOffset(), t.getLength());
}
}
return bytes;
}
/**
* Needed doing 'contains' on List. Only compares the key portion, not the value.
@ -743,13 +1136,6 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
return keyToString(k, 0, k.length);
}
/**
* Use for logging.
* @param b Key portion of a KeyValue.
* @param o Offset to start of key
* @param l Length of key.
* @return Key as a String.
*/
/**
* Produces a string map for this key/value pair. Useful for programmatic use
* and manipulation of the data stored in an HLogKey, for example, printing
@ -765,9 +1151,24 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
stringMap.put("qualifier", Bytes.toStringBinary(getQualifier()));
stringMap.put("timestamp", getTimestamp());
stringMap.put("vlen", getValueLength());
List<Tag> tags = getTags();
if (tags != null) {
List<String> tagsString = new ArrayList<String>();
for (Tag t : tags) {
tagsString.add((t.getType()) + ":" +Bytes.toStringBinary(t.getValue()));
}
stringMap.put("tag", tagsString);
}
return stringMap;
}
/**
* Use for logging.
* @param b Key portion of a KeyValue.
* @param o Offset to start of key
* @param l Length of key.
* @return Key as a String.
*/
public static String keyToString(final byte [] b, final int o, final int l) {
if (b == null) return "";
int rowlength = Bytes.toShort(b, o);
@ -839,9 +1240,9 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
* @return length of entire KeyValue, in bytes
*/
private static int getLength(byte [] bytes, int offset) {
return ROW_OFFSET +
Bytes.toInt(bytes, offset) +
Bytes.toInt(bytes, offset + Bytes.SIZEOF_INT);
int klength = ROW_OFFSET + Bytes.toInt(bytes, offset);
int vlength = Bytes.toInt(bytes, offset + Bytes.SIZEOF_INT);
return klength + vlength;
}
/**
@ -876,11 +1277,12 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
}
/**
* @return Value offset
* @return the value offset
*/
@Override
public int getValueOffset() {
return getKeyOffset() + getKeyLength();
int voffset = getKeyOffset() + getKeyLength();
return voffset;
}
/**
@ -888,7 +1290,8 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
*/
@Override
public int getValueLength() {
return Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT);
int vlength = Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT);
return vlength;
}
/**
@ -1185,6 +1588,55 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
return CellUtil.cloneQualifier(this);
}
/**
* This returns the offset where the tag actually starts.
*/
@Override
public int getTagsOffset() {
short tagsLen = getTagsLength();
if (tagsLen == 0) {
return this.offset + this.length;
}
return this.offset + this.length - tagsLen;
}
/**
* This returns the total length of the tag bytes
*/
@Override
public short getTagsLength() {
int tagsLen = this.length - (getKeyLength() + getValueLength() + KEYVALUE_INFRASTRUCTURE_SIZE);
if (tagsLen > 0) {
// There are some Tag bytes in the byte[]. So reduce 2 bytes which is added to denote the tags
// length
tagsLen -= TAGS_LENGTH_SIZE;
}
return (short) tagsLen;
}
/**
* This method may not be right. But we cannot use the CellUtil.getTagIterator because we don't know
* getKeyOffset and getKeyLength
* Cannnot use the getKeyOffset and getKeyLength in CellUtil as they are not part of the Cell interface.
* Returns any tags embedded in the KeyValue.
* @return The tags
*/
public List<Tag> getTags() {
short tagsLength = getTagsLength();
if (tagsLength == 0) {
return new ArrayList<Tag>();
}
return Tag.createTags(getBuffer(), getTagsOffset(), tagsLength);
}
/**
* @return the backing array of the entire KeyValue (all KeyValue fields are in a single array)
*/
@Override
public byte[] getTagsArray() {
return bytes;
}
//---------------------------------------------------------------------------
//
// Compare specified fields against those contained in this KeyValue
@ -2169,7 +2621,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
int len = writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset, flength,
qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum,
null, 0, 0);
null, 0, 0, null);
return new KeyValue(buffer, boffset, len);
}
@ -2424,22 +2876,4 @@ public class KeyValue implements Cell, HeapSize, Cloneable {
sum += Bytes.SIZEOF_LONG;// memstoreTS
return ClassSize.align(sum);
}
// -----
// KV tags stubs
@Override
public int getTagsOffset() {
throw new UnsupportedOperationException("Not implememnted");
}
@Override
public short getTagsLength() {
throw new UnsupportedOperationException("Not implememnted");
}
@Override
public byte[] getTagsArray() {
throw new UnsupportedOperationException("Not implememnted");
}
}

View File

@ -93,12 +93,12 @@ public class KeyValueTestUtil {
}
public static List<KeyValue> rewindThenToList(final ByteBuffer bb,
final boolean includesMemstoreTS) {
final boolean includesMemstoreTS, final boolean useTags) {
bb.rewind();
List<KeyValue> kvs = Lists.newArrayList();
KeyValue kv = null;
while (true) {
kv = KeyValueUtil.nextShallowCopy(bb, includesMemstoreTS);
kv = KeyValueUtil.nextShallowCopy(bb, includesMemstoreTS, useTags);
if (kv == null) {
break;
}

View File

@ -24,9 +24,9 @@ import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.SimpleByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.IterableUtils;
import org.apache.hadoop.hbase.util.SimpleByteRange;
import org.apache.hadoop.io.WritableUtils;
import com.google.common.base.Function;
@ -41,8 +41,9 @@ public class KeyValueUtil {
/**************** length *********************/
public static int length(final Cell cell) {
return (int)KeyValue.getKeyValueDataStructureSize(cell.getRowLength(), cell.getFamilyLength(),
cell.getQualifierLength(), cell.getValueLength());
return (int) (KeyValue.getKeyValueDataStructureSize(cell.getRowLength(),
cell.getFamilyLength(), cell.getQualifierLength(), cell.getValueLength(),
cell.getTagsLength()));
}
protected static int keyLength(final Cell cell) {
@ -71,7 +72,8 @@ public class KeyValueUtil {
/**************** copy key only *********************/
public static KeyValue copyToNewKeyValue(final Cell cell) {
KeyValue kvCell = new KeyValue(copyToNewByteArray(cell));
byte[] bytes = copyToNewByteArray(cell);
KeyValue kvCell = new KeyValue(bytes, 0, bytes.length);
kvCell.setMvccVersion(cell.getMvccVersion());
return kvCell;
}
@ -112,8 +114,12 @@ public class KeyValueUtil {
pos = Bytes.putInt(output, pos, keyLength(cell));
pos = Bytes.putInt(output, pos, cell.getValueLength());
pos = appendKeyToByteArrayWithoutValue(cell, output, pos);
CellUtil.copyValueTo(cell, output, pos);
return pos + cell.getValueLength();
pos = CellUtil.copyValueTo(cell, output, pos);
if ((cell.getTagsLength() > 0)) {
pos = Bytes.putShort(output, pos, cell.getTagsLength());
pos = CellUtil.copyTagTo(cell, output, pos);
}
return pos;
}
public static ByteBuffer copyToNewByteBuffer(final Cell cell) {
@ -142,20 +148,30 @@ public class KeyValueUtil {
/**
* Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's
* position to the start of the next KeyValue. Does not allocate a new array or copy data.
* @param bb
* @param includesMvccVersion
* @param includesTags
*/
public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion) {
public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion,
boolean includesTags) {
if (bb.isDirect()) {
throw new IllegalArgumentException("only supports heap buffers");
}
if (bb.remaining() < 1) {
return null;
}
KeyValue keyValue = null;
int underlyingArrayOffset = bb.arrayOffset() + bb.position();
int keyLength = bb.getInt();
int valueLength = bb.getInt();
int kvLength = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keyLength + valueLength;
KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
ByteBufferUtils.skip(bb, keyLength + valueLength);
short tagsLength = 0;
if (includesTags) {
tagsLength = bb.getShort();
ByteBufferUtils.skip(bb, tagsLength);
}
int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength);
keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength);
if (includesMvccVersion) {
long mvccVersion = ByteBufferUtils.readVLong(bb);
keyValue.setMvccVersion(mvccVersion);

View File

@ -0,0 +1,174 @@
/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Bytes;
/**
* <code>&lt;taglength>&lt;tagtype>&lt;tagbytes></code>. <code>tagtype</code> is
* one byte and <code>taglength</code> maximum is <code>Short.MAX_SIZE</code>.
* It includes 1 byte type length and actual tag bytes length.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class Tag {
public final static int TYPE_LENGTH_SIZE = Bytes.SIZEOF_BYTE;
public final static int TAG_LENGTH_SIZE = Bytes.SIZEOF_SHORT;
public final static int INFRASTRUCTURE_SIZE = TYPE_LENGTH_SIZE + TAG_LENGTH_SIZE;
private byte type;
private byte[] bytes;
private int offset = 0;
private short length = 0;
// The special tag will write the length of each tag and that will be
// followed by the type and then the actual tag.
// So every time the length part is parsed we need to add + 1 byte to it to
// get the type and then get the actual tag.
public Tag(byte tagType, String tag) {
this(tagType, Bytes.toBytes(tag));
}
/**
* @param tagType
* @param tag
*/
public Tag(byte tagType, byte[] tag) {
// <length of tag - 2 bytes><type code - 1 byte><tag>
short tagLength = (short) ((tag.length & 0x0000ffff) + TYPE_LENGTH_SIZE);
length = (short) (TAG_LENGTH_SIZE + tagLength);
bytes = new byte[length];
int pos = Bytes.putShort(bytes, 0, tagLength);
pos = Bytes.putByte(bytes, pos, tagType);
Bytes.putBytes(bytes, pos, tag, 0, tag.length);
this.type = tagType;
}
/**
* Creates a Tag from the specified byte array and offset. Presumes
* <code>bytes</code> content starting at <code>offset</code> is formatted as
* a Tag blob.
* The bytes to include the tag type, tag length and actual tag bytes.
* @param bytes
* byte array
* @param offset
* offset to start of Tag
*/
public Tag(byte[] bytes, int offset) {
this(bytes, offset, getLength(bytes, offset));
}
private static short getLength(byte[] bytes, int offset) {
return (short) (TAG_LENGTH_SIZE + Bytes.toShort(bytes, offset));
}
/**
* Creates a Tag from the specified byte array, starting at offset, and for
* length <code>length</code>. Presumes <code>bytes</code> content starting at
* <code>offset</code> is formatted as a Tag blob.
* @param bytes
* byte array
* @param offset
* offset to start of the Tag
* @param length
* length of the Tag
*/
public Tag(byte[] bytes, int offset, short length) {
this.bytes = bytes;
this.offset = offset;
this.length = length;
this.type = bytes[offset + TAG_LENGTH_SIZE];
}
/**
* @return The byte array backing this Tag.
*/
public byte[] getBuffer() {
return this.bytes;
}
/**
* @return the tag type
*/
public byte getType() {
return this.type;
}
/**
* @return Length of actual tag bytes within the backed buffer
*/
public int getTagLength() {
return this.length - INFRASTRUCTURE_SIZE;
}
/**
* @return Offset of actual tag bytes within the backed buffer
*/
public int getTagOffset() {
return this.offset + INFRASTRUCTURE_SIZE;
}
public byte[] getValue() {
int tagLength = getTagLength();
byte[] tag = new byte[tagLength];
Bytes.putBytes(tag, 0, bytes, getTagOffset(), tagLength);
return tag;
}
/**
* Creates the list of tags from the byte array b. Expected that b is in the
* expected tag format
* @param b
* @param offset
* @param length
* @return List of tags
*/
public static List<Tag> createTags(byte[] b, int offset, short length) {
List<Tag> tags = new ArrayList<Tag>();
int pos = offset;
while (pos < offset + length) {
short tagLen = Bytes.toShort(b, pos);
tags.add(new Tag(b, pos, (short) (tagLen + TAG_LENGTH_SIZE)));
pos += TAG_LENGTH_SIZE + tagLen;
}
return tags;
}
/**
* Returns the total length of the entire tag entity
* @return
*/
short getLength() {
return this.length;
}
/**
* Returns the offset of the entire tag entity
* @return
*/
int getOffset() {
return this.offset;
}
}

View File

@ -53,6 +53,8 @@ public class CellCodec implements Codec {
this.out.write(cell.getTypeByte());
// Value
write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
// Write tags
write(cell.getTagsArray(), cell.getTagsOffset(), cell.getTagsLength());
// MvccVersion
this.out.write(Bytes.toBytes(cell.getMvccVersion()));
}
@ -85,11 +87,12 @@ public class CellCodec implements Codec {
long timestamp = Bytes.toLong(longArray);
byte type = (byte) this.in.read();
byte [] value = readByteArray(in);
byte[] tags = readByteArray(in);
// Read memstore version
byte[] memstoreTSArray = new byte[Bytes.SIZEOF_LONG];
IOUtils.readFully(this.in, memstoreTSArray);
long memstoreTS = Bytes.toLong(memstoreTSArray);
return CellUtil.createCell(row, family, qualifier, timestamp, type, value, memstoreTS);
return CellUtil.createCell(row, family, qualifier, timestamp, type, value, tags, memstoreTS);
}
/**

View File

@ -26,8 +26,8 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.KeyValue.SamePrefixComparator;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.WritableUtils;
@ -42,8 +42,15 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
@Override
public ByteBuffer decodeKeyValues(DataInputStream source,
boolean includesMemstoreTS) throws IOException {
return decodeKeyValues(source, 0, 0, includesMemstoreTS);
HFileBlockDecodingContext blkDecodingCtx) throws IOException {
if (blkDecodingCtx.getClass() != HFileBlockDefaultDecodingContext.class) {
throw new IOException(this.getClass().getName() + " only accepts "
+ HFileBlockDefaultDecodingContext.class.getName() + " as the decoding context.");
}
HFileBlockDefaultDecodingContext decodingCtx =
(HFileBlockDefaultDecodingContext) blkDecodingCtx;
return internalDecodeKeyValues(source, 0, 0, decodingCtx);
}
protected static class SeekerState {
@ -51,6 +58,8 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
protected int keyLength;
protected int valueLength;
protected int lastCommonPrefix;
protected int tagLength = 0;
protected int tagOffset = -1;
/** We need to store a copy of the key. */
protected byte[] keyBuffer = new byte[INITIAL_KEY_BUFFER_SIZE];
@ -112,21 +121,30 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
protected abstract static class
BufferedEncodedSeeker<STATE extends SeekerState>
implements EncodedSeeker {
protected HFileBlockDecodingContext decodingCtx;
protected final KVComparator comparator;
protected final SamePrefixComparator<byte[]> samePrefixComparator;
protected ByteBuffer currentBuffer;
protected STATE current = createSeekerState(); // always valid
protected STATE previous = createSeekerState(); // may not be valid
@SuppressWarnings("unchecked")
public BufferedEncodedSeeker(KVComparator comparator) {
public BufferedEncodedSeeker(KVComparator comparator,
HFileBlockDecodingContext decodingCtx) {
this.comparator = comparator;
if (comparator instanceof SamePrefixComparator) {
this.samePrefixComparator = (SamePrefixComparator<byte[]>) comparator;
} else {
this.samePrefixComparator = null;
}
this.decodingCtx = decodingCtx;
}
protected boolean includesMvcc() {
return this.decodingCtx.getHFileContext().shouldIncludeMvcc();
}
protected boolean includesTags() {
return this.decodingCtx.getHFileContext().shouldIncludeTags();
}
@Override
@ -152,21 +170,33 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
@Override
public ByteBuffer getKeyValueBuffer() {
ByteBuffer kvBuffer = ByteBuffer.allocate(
2 * Bytes.SIZEOF_INT + current.keyLength + current.valueLength);
ByteBuffer kvBuffer = createKVBuffer();
kvBuffer.putInt(current.keyLength);
kvBuffer.putInt(current.valueLength);
kvBuffer.put(current.keyBuffer, 0, current.keyLength);
kvBuffer.put(currentBuffer.array(),
currentBuffer.arrayOffset() + current.valueOffset,
current.valueLength);
if (current.tagLength > 0) {
kvBuffer.putShort((short) current.tagLength);
kvBuffer.put(currentBuffer.array(), currentBuffer.arrayOffset() + current.tagOffset,
current.tagLength);
}
return kvBuffer;
}
protected ByteBuffer createKVBuffer() {
int kvBufSize = (int) KeyValue.getKeyValueDataStructureSize(current.keyLength,
current.valueLength, current.tagLength);
ByteBuffer kvBuffer = ByteBuffer.allocate(kvBufSize);
return kvBuffer;
}
@Override
public KeyValue getKeyValue() {
ByteBuffer kvBuf = getKeyValueBuffer();
KeyValue kv = new KeyValue(kvBuf.array(), kvBuf.arrayOffset());
KeyValue kv = new KeyValue(kvBuf.array(), kvBuf.arrayOffset(), kvBuf.array().length
- kvBuf.arrayOffset());
kv.setMvccVersion(current.memstoreTS);
return kv;
}
@ -188,6 +218,12 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
return true;
}
public void decodeTags() {
current.tagLength = ByteBufferUtils.readCompressedInt(currentBuffer);
current.tagOffset = currentBuffer.position();
ByteBufferUtils.skip(currentBuffer, current.tagLength);
}
@Override
public int seekToKeyInBlock(byte[] key, int offset, int length,
boolean seekBefore) {
@ -276,8 +312,13 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
}
protected final void afterEncodingKeyValue(ByteBuffer in,
DataOutputStream out, boolean includesMemstoreTS) {
if (includesMemstoreTS) {
DataOutputStream out, HFileBlockDefaultEncodingContext encodingCtx) throws IOException {
if (encodingCtx.getHFileContext().shouldIncludeTags()) {
int tagsLength = in.getShort();
ByteBufferUtils.putCompressedInt(out, tagsLength);
ByteBufferUtils.moveBufferToStream(out, in, tagsLength);
}
if (encodingCtx.getHFileContext().shouldIncludeMvcc()) {
// Copy memstore timestamp from the byte buffer to the output stream.
long memstoreTS = -1;
try {
@ -291,8 +332,13 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
}
protected final void afterDecodingKeyValue(DataInputStream source,
ByteBuffer dest, boolean includesMemstoreTS) {
if (includesMemstoreTS) {
ByteBuffer dest, HFileBlockDefaultDecodingContext decodingCtx) throws IOException {
if (decodingCtx.getHFileContext().shouldIncludeTags()) {
int tagsLength = ByteBufferUtils.readCompressedInt(source);
dest.putShort((short)tagsLength);
ByteBufferUtils.copyFromStreamToBuffer(dest, source, tagsLength);
}
if (decodingCtx.getHFileContext().shouldIncludeMvcc()) {
long memstoreTS = -1;
try {
// Copy memstore timestamp from the data input stream to the byte
@ -307,33 +353,32 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
}
@Override
public HFileBlockEncodingContext newDataBlockEncodingContext(
Algorithm compressionAlgorithm,
DataBlockEncoding encoding, byte[] header) {
return new HFileBlockDefaultEncodingContext(
compressionAlgorithm, encoding, header);
public HFileBlockEncodingContext newDataBlockEncodingContext(DataBlockEncoding encoding,
byte[] header, HFileContext meta) {
return new HFileBlockDefaultEncodingContext(encoding, header, meta);
}
@Override
public HFileBlockDecodingContext newDataBlockDecodingContext(
Algorithm compressionAlgorithm) {
return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
public HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta) {
return new HFileBlockDefaultDecodingContext(meta);
}
/**
* Compress KeyValues and write them to output buffer.
* @param out Where to write compressed data.
* @param in Source of KeyValue for compression.
* @param includesMemstoreTS true if including memstore timestamp after every
* key-value pair
* @param encodingCtx use the Encoding ctx associated with the current block
* @throws IOException If there is an error writing to output stream.
*/
public abstract void internalEncodeKeyValues(DataOutputStream out,
ByteBuffer in, boolean includesMemstoreTS) throws IOException;
ByteBuffer in, HFileBlockDefaultEncodingContext encodingCtx) throws IOException;
public abstract ByteBuffer internalDecodeKeyValues(DataInputStream source,
int allocateHeaderLength, int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx)
throws IOException;
@Override
public void encodeKeyValues(ByteBuffer in,
boolean includesMemstoreTS,
HFileBlockEncodingContext blkEncodingCtx) throws IOException {
if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
throw new IOException (this.getClass().getName() + " only accepts "
@ -347,7 +392,7 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder {
DataOutputStream dataOut =
((HFileBlockDefaultEncodingContext) encodingCtx)
.getOutputStreamForEncoder();
internalEncodeKeyValues(dataOut, in, includesMemstoreTS);
internalEncodeKeyValues(dataOut, in, encodingCtx);
if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
encodingCtx.postEncoding(BlockType.ENCODED_DATA);
} else {

View File

@ -34,24 +34,12 @@ import org.apache.hadoop.hbase.util.Bytes;
public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder {
@Override
public void internalEncodeKeyValues(DataOutputStream out,
ByteBuffer in, boolean includesMemstoreTS) throws IOException {
ByteBuffer in, HFileBlockDefaultEncodingContext encodingCtx) throws IOException {
in.rewind();
ByteBufferUtils.putInt(out, in.limit());
ByteBufferUtils.moveBufferToStream(out, in, in.limit());
}
@Override
public ByteBuffer decodeKeyValues(DataInputStream source,
int preserveHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
throws IOException {
int decompressedSize = source.readInt();
ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
preserveHeaderLength);
buffer.position(preserveHeaderLength);
ByteBufferUtils.copyFromStreamToBuffer(buffer, source, decompressedSize);
return buffer;
}
@Override
public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
@ -68,8 +56,8 @@ public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder {
@Override
public EncodedSeeker createSeeker(KVComparator comparator,
final boolean includesMemstoreTS) {
return new BufferedEncodedSeeker<SeekerState>(comparator) {
final HFileBlockDecodingContext decodingCtx) {
return new BufferedEncodedSeeker<SeekerState>(comparator, decodingCtx) {
@Override
protected void decodeNext() {
current.keyLength = currentBuffer.getInt();
@ -78,7 +66,11 @@ public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder {
currentBuffer.get(current.keyBuffer, 0, current.keyLength);
current.valueOffset = currentBuffer.position();
ByteBufferUtils.skip(currentBuffer, current.valueLength);
if (includesMemstoreTS) {
if (includesTags()) {
current.tagLength = currentBuffer.getShort();
ByteBufferUtils.skip(currentBuffer, current.tagLength);
}
if (includesMvcc()) {
current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
} else {
current.memstoreTS = 0;
@ -95,4 +87,16 @@ public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder {
};
}
@Override
public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException {
int decompressedSize = source.readInt();
ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
allocateHeaderLength);
buffer.position(allocateHeaderLength);
ByteBufferUtils.copyFromStreamToBuffer(buffer, source, decompressedSize);
return buffer;
}
}

View File

@ -23,8 +23,7 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
/**
* Encoding of KeyValue. It aims to be fast and efficient using assumptions:
@ -38,7 +37,7 @@ import org.apache.hadoop.io.RawComparator;
*
* After encoding, it also optionally compresses the encoded data if a
* compression algorithm is specified in HFileBlockEncodingContext argument of
* {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}.
* {@link #encodeKeyValues(ByteBuffer, HFileBlockEncodingContext)}.
*/
@InterfaceAudience.Private
public interface DataBlockEncoder {
@ -49,44 +48,23 @@ public interface DataBlockEncoder {
*
* @param in
* Source of KeyValue for compression.
* @param includesMemstoreTS
* true if including memstore timestamp after every key-value pair
* @param encodingContext
* @param encodingCtx
* the encoding context which will contain encoded uncompressed bytes
* as well as compressed encoded bytes if compression is enabled, and
* also it will reuse resources across multiple calls.
* @throws IOException
* If there is an error writing to output stream.
*/
void encodeKeyValues(
ByteBuffer in, boolean includesMemstoreTS, HFileBlockEncodingContext encodingContext
) throws IOException;
void encodeKeyValues(ByteBuffer in, HFileBlockEncodingContext encodingCtx) throws IOException;
/**
* Decode.
* @param source Compressed stream of KeyValues.
* @param includesMemstoreTS true if including memstore timestamp after every
* key-value pair
* @param decodingCtx
* @return Uncompressed block of KeyValues.
* @throws IOException If there is an error in source.
*/
ByteBuffer decodeKeyValues(
DataInputStream source, boolean includesMemstoreTS
) throws IOException;
/**
* Uncompress.
* @param source encoded stream of KeyValues.
* @param allocateHeaderLength allocate this many bytes for the header.
* @param skipLastBytes Do not copy n last bytes.
* @param includesMemstoreTS true if including memstore timestamp after every
* key-value pair
* @return Uncompressed block of KeyValues.
* @throws IOException If there is an error in source.
*/
ByteBuffer decodeKeyValues(
DataInputStream source, int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS
)
ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
throws IOException;
/**
@ -102,42 +80,36 @@ public interface DataBlockEncoder {
/**
* Create a HFileBlock seeker which find KeyValues within a block.
* @param comparator what kind of comparison should be used
* @param includesMemstoreTS true if including memstore timestamp after every
* key-value pair
* @param decodingCtx
* @return A newly created seeker.
*/
EncodedSeeker createSeeker(
KVComparator comparator, boolean includesMemstoreTS
);
EncodedSeeker createSeeker(KVComparator comparator,
HFileBlockDecodingContext decodingCtx);
/**
* Creates a encoder specific encoding context
*
* @param compressionAlgorithm
* compression algorithm used if the final data needs to be
* compressed
* @param encoding
* encoding strategy used
* @param headerBytes
* header bytes to be written, put a dummy header here if the header
* is unknown
* @param meta
* HFile meta data
* @return a newly created encoding context
*/
HFileBlockEncodingContext newDataBlockEncodingContext(
Algorithm compressionAlgorithm, DataBlockEncoding encoding, byte[] headerBytes
);
DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta);
/**
* Creates an encoder specific decoding context, which will prepare the data
* before actual decoding
*
* @param compressionAlgorithm
* compression algorithm used if the data needs to be decompressed
* @param meta
* HFile meta data
* @return a newly created decoding context
*/
HFileBlockDecodingContext newDataBlockDecodingContext(
Algorithm compressionAlgorithm
);
HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta);
/**
* An interface which enable to seek while underlying data is encoded.

View File

@ -318,7 +318,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
@Override
public void internalEncodeKeyValues(DataOutputStream out,
ByteBuffer in, boolean includesMemstoreTS) throws IOException {
ByteBuffer in, HFileBlockDefaultEncodingContext encodingCtx) throws IOException {
in.rewind();
ByteBufferUtils.putInt(out, in.limit());
DiffCompressionState previousState = new DiffCompressionState();
@ -326,7 +326,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
while (in.hasRemaining()) {
compressSingleKeyValue(previousState, currentState,
out, in);
afterEncodingKeyValue(in, out, includesMemstoreTS);
afterEncodingKeyValue(in, out, encodingCtx);
// swap previousState <-> currentState
DiffCompressionState tmp = previousState;
@ -335,26 +335,6 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
}
}
@Override
public ByteBuffer decodeKeyValues(DataInputStream source,
int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
throws IOException {
int decompressedSize = source.readInt();
ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
allocHeaderLength);
buffer.position(allocHeaderLength);
DiffCompressionState state = new DiffCompressionState();
while (source.available() > skipLastBytes) {
uncompressSingleKeyValue(source, buffer, state);
afterDecodingKeyValue(source, buffer, includesMemstoreTS);
}
if (source.available() != skipLastBytes) {
throw new IllegalStateException("Read too much bytes.");
}
return buffer;
}
@Override
public ByteBuffer getFirstKeyInBlock(ByteBuffer block) {
@ -424,8 +404,8 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
@Override
public EncodedSeeker createSeeker(KVComparator comparator,
final boolean includesMemstoreTS) {
return new BufferedEncodedSeeker<DiffSeekerState>(comparator) {
HFileBlockDecodingContext decodingCtx) {
return new BufferedEncodedSeeker<DiffSeekerState>(comparator, decodingCtx) {
private byte[] familyNameWithSize;
private static final int TIMESTAMP_WITH_TYPE_LENGTH =
Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE;
@ -517,7 +497,10 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
current.valueOffset = currentBuffer.position();
ByteBufferUtils.skip(currentBuffer, current.valueLength);
if (includesMemstoreTS) {
if (includesTags()) {
decodeTags();
}
if (includesMvcc()) {
current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
} else {
current.memstoreTS = 0;
@ -549,4 +532,24 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder {
}
};
}
@Override
public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException {
int decompressedSize = source.readInt();
ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
allocateHeaderLength);
buffer.position(allocateHeaderLength);
DiffCompressionState state = new DiffCompressionState();
while (source.available() > skipLastBytes) {
uncompressSingleKeyValue(source, buffer, state);
afterDecodingKeyValue(source, buffer, decodingCtx);
}
if (source.available() != skipLastBytes) {
throw new IllegalStateException("Read too much bytes.");
}
return buffer;
}
}

View File

@ -29,8 +29,9 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.compress.Compressor;
@ -48,25 +49,26 @@ public class EncodedDataBlock {
private DataBlockEncoder dataBlockEncoder;
private byte[] cachedEncodedData;
private boolean includesMemstoreTS;
private final HFileBlockEncodingContext encodingCtx;
private HFileContext meta;
/**
* Create a buffer which will be encoded using dataBlockEncoder.
* @param dataBlockEncoder Algorithm used for compression.
* @param encoding encoding type used
* @param rawKVs
* @param meta
*/
public EncodedDataBlock(DataBlockEncoder dataBlockEncoder,
boolean includesMemstoreTS, DataBlockEncoding encoding, byte[] rawKVs) {
public EncodedDataBlock(DataBlockEncoder dataBlockEncoder, DataBlockEncoding encoding,
byte[] rawKVs, HFileContext meta) {
Preconditions.checkNotNull(encoding,
"Cannot create encoded data block with null encoder");
this.dataBlockEncoder = dataBlockEncoder;
encodingCtx =
dataBlockEncoder.newDataBlockEncodingContext(Compression.Algorithm.NONE,
encoding, HConstants.HFILEBLOCK_DUMMY_HEADER);
encodingCtx = dataBlockEncoder.newDataBlockEncodingContext(encoding,
HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
this.rawKVs = rawKVs;
this.meta = meta;
}
/**
@ -97,19 +99,30 @@ public class EncodedDataBlock {
public Cell next() {
if (decompressedData == null) {
try {
decompressedData = dataBlockEncoder.decodeKeyValues(
dis, includesMemstoreTS);
decompressedData = dataBlockEncoder.decodeKeyValues(dis, dataBlockEncoder
.newDataBlockDecodingContext(meta));
} catch (IOException e) {
throw new RuntimeException("Problem with data block encoder, " +
"most likely it requested more bytes than are available.", e);
}
decompressedData.rewind();
}
int offset = decompressedData.position();
KeyValue kv = new KeyValue(decompressedData.array(), offset);
decompressedData.position(offset + kv.getLength());
int klen = decompressedData.getInt();
int vlen = decompressedData.getInt();
short tagsLen = 0;
ByteBufferUtils.skip(decompressedData, klen + vlen);
// Read the tag length in case when steam contain tags
if (meta.shouldIncludeTags()) {
tagsLen = decompressedData.getShort();
ByteBufferUtils.skip(decompressedData, tagsLen);
}
KeyValue kv = new KeyValue(decompressedData.array(), offset,
(int) KeyValue.getKeyValueDataStructureSize(klen, vlen, tagsLen));
if (meta.shouldIncludeMvcc()) {
long mvccVersion = ByteBufferUtils.readVLong(decompressedData);
kv.setMvccVersion(mvccVersion);
}
return kv;
}
@ -199,7 +212,7 @@ public class EncodedDataBlock {
public byte[] encodeData() {
try {
this.dataBlockEncoder.encodeKeyValues(
getUncompressedBuffer(), includesMemstoreTS, encodingCtx);
getUncompressedBuffer(), encodingCtx);
} catch (IOException e) {
throw new RuntimeException(String.format(
"Bug in encoding part of algorithm %s. " +

View File

@ -343,8 +343,8 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
}
@Override
public void internalEncodeKeyValues(DataOutputStream out,
ByteBuffer in, boolean includesMemstoreTS) throws IOException {
public void internalEncodeKeyValues(DataOutputStream out, ByteBuffer in,
HFileBlockDefaultEncodingContext encodingCtx) throws IOException {
in.rewind();
ByteBufferUtils.putInt(out, in.limit());
FastDiffCompressionState previousState = new FastDiffCompressionState();
@ -352,7 +352,7 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
while (in.hasRemaining()) {
compressSingleKeyValue(previousState, currentState,
out, in);
afterEncodingKeyValue(in, out, includesMemstoreTS);
afterEncodingKeyValue(in, out, encodingCtx);
// swap previousState <-> currentState
FastDiffCompressionState tmp = previousState;
@ -362,17 +362,16 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
}
@Override
public ByteBuffer decodeKeyValues(DataInputStream source,
int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
throws IOException {
public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException {
int decompressedSize = source.readInt();
ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
allocHeaderLength);
buffer.position(allocHeaderLength);
allocateHeaderLength);
buffer.position(allocateHeaderLength);
FastDiffCompressionState state = new FastDiffCompressionState();
while (source.available() > skipLastBytes) {
uncompressSingleKeyValue(source, buffer, state);
afterDecodingKeyValue(source, buffer, includesMemstoreTS);
afterDecodingKeyValue(source, buffer, decodingCtx);
}
if (source.available() != skipLastBytes) {
@ -419,8 +418,8 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
@Override
public EncodedSeeker createSeeker(KVComparator comparator,
final boolean includesMemstoreTS) {
return new BufferedEncodedSeeker<FastDiffSeekerState>(comparator) {
final HFileBlockDecodingContext decodingCtx) {
return new BufferedEncodedSeeker<FastDiffSeekerState>(comparator, decodingCtx) {
private void decode(boolean isFirst) {
byte flag = currentBuffer.get();
if ((flag & FLAG_SAME_KEY_LENGTH) == 0) {
@ -520,7 +519,10 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder {
ByteBufferUtils.skip(currentBuffer, current.valueLength);
}
if (includesMemstoreTS) {
if (includesTags()) {
decodeTags();
}
if (includesMvcc()) {
current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
} else {
current.memstoreTS = 0;

View File

@ -20,7 +20,7 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
/**
* A decoding context that is created by a reader's encoder, and is shared
@ -31,14 +31,9 @@ import org.apache.hadoop.hbase.io.compress.Compression;
@InterfaceAudience.Private
public interface HFileBlockDecodingContext {
/**
* @return the compression algorithm used by this decoding context
*/
Compression.Algorithm getCompression();
/**
* Perform all actions that need to be done before the encoder's real decoding process.
* Decompression needs to be done if {@link #getCompression()} returns a valid compression
* Decompression needs to be done if {@link HFileContext#getCompression()} returns a valid compression
* algorithm.
*
* @param onDiskSizeWithoutHeader numBytes after block and encoding headers
@ -57,4 +52,8 @@ public interface HFileBlockDecodingContext {
int offset
) throws IOException;
/**
* @return HFile meta information
*/
HFileContext getHFileContext();
}

View File

@ -24,7 +24,7 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
/**
* A default implementation of {@link HFileBlockDecodingContext}. It assumes the
@ -37,11 +37,10 @@ import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
public class HFileBlockDefaultDecodingContext implements
HFileBlockDecodingContext {
private final Compression.Algorithm compressAlgo;
public HFileBlockDefaultDecodingContext(
Compression.Algorithm compressAlgo) {
this.compressAlgo = compressAlgo;
private final HFileContext fileContext;
public HFileBlockDefaultDecodingContext(HFileContext fileContext) {
this.fileContext = fileContext;
}
@Override
@ -52,12 +51,11 @@ public class HFileBlockDefaultDecodingContext implements
Compression.decompress(blockBufferWithoutHeader.array(),
blockBufferWithoutHeader.arrayOffset(), (InputStream) dis, onDiskSizeWithoutHeader,
uncompressedSizeWithoutHeader, compressAlgo);
uncompressedSizeWithoutHeader, this.fileContext.getCompression());
}
@Override
public Algorithm getCompression() {
return compressAlgo;
public HFileContext getHFileContext() {
return this.fileContext;
}
}

View File

@ -24,8 +24,8 @@ import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.io.compress.CompressionOutputStream;
import org.apache.hadoop.io.compress.Compressor;
@ -56,26 +56,25 @@ public class HFileBlockDefaultEncodingContext implements
/** Underlying stream to write compressed bytes to */
private ByteArrayOutputStream compressedByteStream;
/** Compression algorithm for all blocks this instance writes. */
private final Compression.Algorithm compressionAlgorithm;
private ByteArrayOutputStream encodedStream = new ByteArrayOutputStream();
private DataOutputStream dataOut = new DataOutputStream(encodedStream);
private byte[] dummyHeader;
private HFileContext fileContext;
/**
* @param compressionAlgorithm compression algorithm used
* @param encoding encoding used
* @param headerBytes dummy header bytes
* @param fileContext HFile meta data
*/
public HFileBlockDefaultEncodingContext(
Compression.Algorithm compressionAlgorithm,
DataBlockEncoding encoding, byte[] headerBytes) {
public HFileBlockDefaultEncodingContext(DataBlockEncoding encoding, byte[] headerBytes,
HFileContext fileContext) {
this.encodingAlgo = encoding;
this.compressionAlgorithm =
compressionAlgorithm == null ? NONE : compressionAlgorithm;
if (this.compressionAlgorithm != NONE) {
Compression.Algorithm compressionAlgorithm =
fileContext.getCompression() == null ? NONE : fileContext.getCompression();
this.fileContext = fileContext;
if (compressionAlgorithm != NONE) {
compressor = compressionAlgorithm.getCompressor();
compressedByteStream = new ByteArrayOutputStream();
try {
@ -137,7 +136,7 @@ public class HFileBlockDefaultEncodingContext implements
protected void compressAfterEncoding(byte[] uncompressedBytesWithHeader,
BlockType blockType, byte[] headerBytes) throws IOException {
this.uncompressedBytesWithHeader = uncompressedBytesWithHeader;
if (compressionAlgorithm != NONE) {
if (this.fileContext.getCompression() != NONE) {
compressedByteStream.reset();
compressedByteStream.write(headerBytes);
compressionStream.resetState();
@ -176,16 +175,11 @@ public class HFileBlockDefaultEncodingContext implements
@Override
public void close() {
if (compressor != null) {
compressionAlgorithm.returnCompressor(compressor);
this.fileContext.getCompression().returnCompressor(compressor);
compressor = null;
}
}
@Override
public Algorithm getCompression() {
return this.compressionAlgorithm;
}
public DataOutputStream getOutputStreamForEncoder() {
return this.dataOut;
}
@ -194,4 +188,9 @@ public class HFileBlockDefaultEncodingContext implements
public DataBlockEncoding getDataBlockEncoding() {
return this.encodingAlgo;
}
@Override
public HFileContext getHFileContext() {
return this.fileContext;
}
}

View File

@ -20,8 +20,8 @@ import java.io.IOException;
import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
/**
* An encoding context that is created by a writer's encoder, and is shared
@ -55,11 +55,6 @@ public interface HFileBlockEncodingContext {
*/
BlockType getBlockType();
/**
* @return the compression algorithm used by this encoding context
*/
Compression.Algorithm getCompression();
/**
* sets the dummy header bytes
*/
@ -72,8 +67,7 @@ public interface HFileBlockEncodingContext {
/**
* Do any action that needs to be performed after the encoding.
* Compression is also included if {@link #getCompression()} returns non-null
* compression algorithm
* Compression is also included if a non-null compression algorithm is used
*
* @param blockType
* @throws IOException
@ -85,4 +79,8 @@ public interface HFileBlockEncodingContext {
*/
void close();
/**
* @return HFile context information
*/
HFileContext getHFileContext();
}

View File

@ -76,8 +76,8 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
}
@Override
public void internalEncodeKeyValues(DataOutputStream writeHere,
ByteBuffer in, boolean includesMemstoreTS) throws IOException {
public void internalEncodeKeyValues(DataOutputStream writeHere, ByteBuffer in,
HFileBlockDefaultEncodingContext encodingCtx) throws IOException {
in.rewind();
ByteBufferUtils.putInt(writeHere, in.limit());
int prevOffset = -1;
@ -86,24 +86,23 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
while (in.hasRemaining()) {
offset = in.position();
keyLength = addKV(prevOffset, writeHere, in, keyLength);
afterEncodingKeyValue(in, writeHere, includesMemstoreTS);
afterEncodingKeyValue(in, writeHere, encodingCtx);
prevOffset = offset;
}
}
@Override
public ByteBuffer decodeKeyValues(DataInputStream source,
int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS)
throws IOException {
public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException {
int decompressedSize = source.readInt();
ByteBuffer buffer = ByteBuffer.allocate(decompressedSize +
allocHeaderLength);
buffer.position(allocHeaderLength);
allocateHeaderLength);
buffer.position(allocateHeaderLength);
int prevKeyOffset = 0;
while (source.available() > skipLastBytes) {
prevKeyOffset = decodeKeyValue(source, buffer, prevKeyOffset);
afterDecodingKeyValue(source, buffer, includesMemstoreTS);
afterDecodingKeyValue(source, buffer, decodingCtx);
}
if (source.available() != skipLastBytes) {
@ -166,8 +165,8 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
@Override
public EncodedSeeker createSeeker(KVComparator comparator,
final boolean includesMemstoreTS) {
return new BufferedEncodedSeeker<SeekerState>(comparator) {
final HFileBlockDecodingContext decodingCtx) {
return new BufferedEncodedSeeker<SeekerState>(comparator, decodingCtx) {
@Override
protected void decodeNext() {
current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer);
@ -180,7 +179,10 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder {
current.keyLength - current.lastCommonPrefix);
current.valueOffset = currentBuffer.position();
ByteBufferUtils.skip(currentBuffer, current.valueLength);
if (includesMemstoreTS) {
if (includesTags()) {
decodeTags();
}
if (includesMvcc()) {
current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer);
} else {
current.memstoreTS = 0;

View File

@ -0,0 +1,174 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.ClassSize;
/**
* This carries the information on some of the meta data about the HFile. This
* meta data would be used across the HFileWriter/Readers and the HFileBlocks.
* This would help to add new information to the HFile.
* This class is not meant to be immutable.
*/
@InterfaceAudience.Private
public class HFileContext implements HeapSize, Cloneable {
public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024;
public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32;
/** Whether checksum is enabled or not**/
private boolean usesHBaseChecksum = true;
/** Whether mvcc is to be included in the Read/Write**/
private boolean includesMvcc = true;
/**Whether tags are to be included in the Read/Write**/
private boolean includesTags;
/**Compression algorithm used**/
private Algorithm compressAlgo = Algorithm.NONE;
/** Whether tags to be compressed or not**/
private boolean compressTags;
/** the checksum type **/
private ChecksumType checksumType = DEFAULT_CHECKSUM_TYPE;
/** the number of bytes per checksum value **/
private int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM;
/** Number of uncompressed bytes we allow per block. */
private int blocksize = HConstants.DEFAULT_BLOCKSIZE;
private DataBlockEncoding encodingOnDisk = DataBlockEncoding.NONE;
private DataBlockEncoding encodingInCache = DataBlockEncoding.NONE;
//Empty constructor. Go with setters
public HFileContext() {
}
public Algorithm getCompression() {
return compressAlgo;
}
public void setCompressAlgo(Algorithm compressAlgo) {
this.compressAlgo = compressAlgo;
}
public boolean shouldUseHBaseChecksum() {
return usesHBaseChecksum;
}
public void setUsesHBaseChecksum(boolean usesHBaseChecksum) {
this.usesHBaseChecksum = usesHBaseChecksum;
}
public boolean shouldIncludeMvcc() {
return includesMvcc;
}
public void setIncludesMvcc(boolean includesMvcc) {
this.includesMvcc = includesMvcc;
}
public boolean shouldIncludeTags() {
return includesTags;
}
public void setIncludesTags(boolean includesTags) {
this.includesTags = includesTags;
}
public boolean shouldCompressTags() {
return compressTags;
}
public void setCompressTags(boolean compressTags) {
this.compressTags = compressTags;
}
public ChecksumType getChecksumType() {
return checksumType;
}
public void setChecksumType(ChecksumType checksumType) {
this.checksumType = checksumType;
}
public int getBytesPerChecksum() {
return bytesPerChecksum;
}
public void setBytesPerChecksum(int bytesPerChecksum) {
this.bytesPerChecksum = bytesPerChecksum;
}
public int getBlocksize() {
return blocksize;
}
public void setBlocksize(int blocksize) {
this.blocksize = blocksize;
}
public DataBlockEncoding getEncodingOnDisk() {
return encodingOnDisk;
}
public void setEncodingOnDisk(DataBlockEncoding encodingOnDisk) {
this.encodingOnDisk = encodingOnDisk;
}
public DataBlockEncoding getEncodingInCache() {
return encodingInCache;
}
public void setEncodingInCache(DataBlockEncoding encodingInCache) {
this.encodingInCache = encodingInCache;
}
/**
* HeapSize implementation
* NOTE : The heapsize should be altered as and when new state variable are added
* @return heap size of the HFileContext
*/
@Override
public long heapSize() {
long size = ClassSize.align(ClassSize.OBJECT +
// Algorithm reference, encodingondisk, encodingincache, checksumtype
4 * ClassSize.REFERENCE +
2 * Bytes.SIZEOF_INT +
// usesHBaseChecksum, includesMvcc, includesTags and compressTags
4 * Bytes.SIZEOF_BOOLEAN);
return size;
}
@Override
public HFileContext clone() {
HFileContext clonnedCtx = new HFileContext();
clonnedCtx.usesHBaseChecksum = this.usesHBaseChecksum;
clonnedCtx.includesMvcc = this.includesMvcc;
clonnedCtx.includesTags = this.includesTags;
clonnedCtx.compressAlgo = this.compressAlgo;
clonnedCtx.compressTags = this.compressTags;
clonnedCtx.checksumType = this.checksumType;
clonnedCtx.bytesPerChecksum = this.bytesPerChecksum;
clonnedCtx.blocksize = this.blocksize;
clonnedCtx.encodingOnDisk = this.encodingOnDisk;
clonnedCtx.encodingInCache = this.encodingInCache;
return clonnedCtx;
}
}

View File

@ -0,0 +1,97 @@
/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.lang.ClassNotFoundException;
import java.util.zip.Checksum;
import java.lang.reflect.Constructor;
/**
* Utility class that is used to generate a Checksum object.
* The Checksum implementation is pluggable and an application
* can specify their own class that implements their own
* Checksum algorithm.
*/
public class ChecksumFactory {
static private final Class<?>[] EMPTY_ARRAY = new Class[]{};
/**
* Create a new instance of a Checksum object.
* @return The newly created Checksum object
*/
static public Checksum newInstance(String className) throws IOException {
try {
Class<?> clazz = getClassByName(className);
return (Checksum)newInstance(clazz);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
}
/**
* Returns a Constructor that can be used to create a Checksum object.
* @param className classname for which an constructor is created
* @return a new Constructor object
*/
static public Constructor<?> newConstructor(String className)
throws IOException {
try {
Class<?> clazz = getClassByName(className);
Constructor<?> ctor = clazz.getDeclaredConstructor(EMPTY_ARRAY);
ctor.setAccessible(true);
return ctor;
} catch (ClassNotFoundException e) {
throw new IOException(e);
} catch (java.lang.NoSuchMethodException e) {
throw new IOException(e);
}
}
/** Create an object for the given class and initialize it from conf
*
* @param theClass class of which an object is created
* @return a new object
*/
static private <T> T newInstance(Class<T> theClass) {
T result;
try {
Constructor<T> ctor = theClass.getDeclaredConstructor(EMPTY_ARRAY);
ctor.setAccessible(true);
result = ctor.newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
return result;
}
/**
* Load a class by name.
* @param name the class name.
* @return the class object.
* @throws ClassNotFoundException if the class is not found.
*/
static private Class<?> getClassByName(String name)
throws ClassNotFoundException {
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
return Class.forName(name, true, classLoader);
}
}

View File

@ -0,0 +1,180 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.zip.Checksum;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Checksum types. The Checksum type is a one byte number
* that stores a representation of the checksum algorithm
* used to encode a hfile. The ordinal of these cannot
* change or else you risk breaking all existing HFiles out there.
*/
public enum ChecksumType {
NULL((byte)0) {
@Override
public String getName() {
return "NULL";
}
@Override
public void initialize() {
// do nothing
}
@Override
public Checksum getChecksumObject() throws IOException {
return null; // checksums not used
}
},
CRC32((byte)1) {
private volatile Constructor<?> ctor;
@Override
public String getName() {
return "CRC32";
}
@Override
public void initialize() {
final String PURECRC32 = "org.apache.hadoop.util.PureJavaCrc32";
final String JDKCRC = "java.util.zip.CRC32";
LOG = LogFactory.getLog(ChecksumType.class);
// check if hadoop library is available
try {
ctor = ChecksumFactory.newConstructor(PURECRC32);
LOG.info("Checksum using " + PURECRC32);
} catch (Exception e) {
LOG.trace(PURECRC32 + " not available.");
}
try {
// The default checksum class name is java.util.zip.CRC32.
// This is available on all JVMs.
if (ctor == null) {
ctor = ChecksumFactory.newConstructor(JDKCRC);
LOG.info("Checksum can use " + JDKCRC);
}
} catch (Exception e) {
LOG.trace(JDKCRC + " not available.");
}
}
@Override
public Checksum getChecksumObject() throws IOException {
if (ctor == null) {
throw new IOException("Bad constructor for " + getName());
}
try {
return (Checksum)ctor.newInstance();
} catch (Exception e) {
throw new IOException(e);
}
}
},
CRC32C((byte)2) {
private transient Constructor<?> ctor;
@Override
public String getName() {
return "CRC32C";
}
@Override
public void initialize() {
final String PURECRC32C = "org.apache.hadoop.util.PureJavaCrc32C";
LOG = LogFactory.getLog(ChecksumType.class);
try {
ctor = ChecksumFactory.newConstructor(PURECRC32C);
LOG.info("Checksum can use " + PURECRC32C);
} catch (Exception e) {
LOG.trace(PURECRC32C + " not available.");
}
}
@Override
public Checksum getChecksumObject() throws IOException {
if (ctor == null) {
throw new IOException("Bad constructor for " + getName());
}
try {
return (Checksum)ctor.newInstance();
} catch (Exception e) {
throw new IOException(e);
}
}
};
private final byte code;
protected Log LOG;
/** initializes the relevant checksum class object */
abstract void initialize();
/** returns the name of this checksum type */
public abstract String getName();
private ChecksumType(final byte c) {
this.code = c;
initialize();
}
/** returns a object that can be used to generate/validate checksums */
public abstract Checksum getChecksumObject() throws IOException;
public byte getCode() {
return this.code;
}
/**
* Cannot rely on enum ordinals . They change if item is removed or moved.
* Do our own codes.
* @param b
* @return Type associated with passed code.
*/
public static ChecksumType codeToType(final byte b) {
for (ChecksumType t : ChecksumType.values()) {
if (t.getCode() == b) {
return t;
}
}
throw new RuntimeException("Unknown checksum type code " + b);
}
/**
* Map a checksum name to a specific type.
* Do our own names.
* @param name
* @return Type associated with passed code.
*/
public static ChecksumType nameToType(final String name) {
for (ChecksumType t : ChecksumType.values()) {
if (t.getName().equals(name)) {
return t;
}
}
throw new RuntimeException("Unknown checksum type name " + name);
}
}

View File

@ -26,6 +26,7 @@ import java.util.Random;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.io.WritableUtils;
@ -200,6 +201,14 @@ public class RedundantKVGenerator {
* @return sorted list of key values
*/
public List<KeyValue> generateTestKeyValues(int howMany) {
return generateTestKeyValues(howMany, false);
}
/**
* Generate test data useful to test encoders.
* @param howMany How many Key values should be generated.
* @return sorted list of key values
*/
public List<KeyValue> generateTestKeyValues(int howMany, boolean useTags) {
List<KeyValue> result = new ArrayList<KeyValue>();
List<byte[]> rows = generateRows();
@ -267,7 +276,12 @@ public class RedundantKVGenerator {
randomizer.nextBytes(value);
}
result.add(new KeyValue(row, family, qualifier, timestamp, value));
if (useTags) {
result.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] { new Tag(
(byte) 1, "value1") }));
} else {
result.add(new KeyValue(row, family, qualifier, timestamp, value));
}
}
Collections.sort(result, KeyValue.COMPARATOR);
@ -297,7 +311,6 @@ public class RedundantKVGenerator {
ByteBufferUtils.writeVLong(result, kv.getMvccVersion());
}
}
return result;
}

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.hbase;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
@ -526,4 +528,63 @@ public class TestKeyValue extends TestCase {
Bytes.equals(newKey, KeyValue.ROW_LENGTH_SIZE, newRowLength, expectedArray, 0,
expectedArray.length);
}
public void testKVsWithTags() {
byte[] row = Bytes.toBytes("myRow");
byte[] cf = Bytes.toBytes("myCF");
byte[] q = Bytes.toBytes("myQualifier");
byte[] value = Bytes.toBytes("myValue");
byte[] metaValue1 = Bytes.toBytes("metaValue1");
byte[] metaValue2 = Bytes.toBytes("metaValue2");
KeyValue kv = new KeyValue(row, cf, q, HConstants.LATEST_TIMESTAMP, value, new Tag[] {
new Tag((byte) 1, metaValue1), new Tag((byte) 2, metaValue2) });
assertTrue(kv.getTagsLength() > 0);
assertTrue(Bytes.equals(kv.getRow(), row));
assertTrue(Bytes.equals(kv.getFamily(), cf));
assertTrue(Bytes.equals(kv.getQualifier(), q));
assertTrue(Bytes.equals(kv.getValue(), value));
List<Tag> tags = kv.getTags();
assertNotNull(tags);
assertEquals(2, tags.size());
boolean meta1Ok = false, meta2Ok = false;
for (Tag tag : tags) {
if (tag.getType() == (byte) 1) {
if (Bytes.equals(tag.getValue(), metaValue1)) {
meta1Ok = true;
}
} else {
if (Bytes.equals(tag.getValue(), metaValue2)) {
meta2Ok = true;
}
}
}
assertTrue(meta1Ok);
assertTrue(meta2Ok);
Iterator<Tag> tagItr = kv.tagsIterator();
assertTrue(tagItr.hasNext());
Tag next = tagItr.next();
assertEquals(10, next.getTagLength());
assertEquals((byte) 1, next.getType());
Bytes.equals(next.getValue(), metaValue1);
assertTrue(tagItr.hasNext());
next = tagItr.next();
assertEquals(10, next.getTagLength());
assertEquals((byte) 2, next.getType());
Bytes.equals(next.getValue(), metaValue2);
assertFalse(tagItr.hasNext());
tagItr = kv.tagsIterator();
assertTrue(tagItr.hasNext());
next = tagItr.next();
assertEquals(10, next.getTagLength());
assertEquals((byte) 1, next.getType());
Bytes.equals(next.getValue(), metaValue1);
assertTrue(tagItr.hasNext());
next = tagItr.next();
assertEquals(10, next.getTagLength());
assertEquals((byte) 2, next.getType());
Bytes.equals(next.getValue(), metaValue2);
assertFalse(tagItr.hasNext());
}
}

View File

@ -27,8 +27,10 @@ import java.io.IOException;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.codec.CellCodec;
import org.apache.hadoop.hbase.codec.Codec;
import org.apache.hadoop.hbase.util.Bytes;
@ -122,4 +124,47 @@ public class TestCellCodec {
dis.close();
assertEquals(offset, cis.getCount());
}
}
@Test
public void testThreeWithTag() throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
CountingOutputStream cos = new CountingOutputStream(baos);
DataOutputStream dos = new DataOutputStream(cos);
Codec codec = new CellCodec();
Codec.Encoder encoder = codec.getEncoder(dos);
final KeyValue kv1 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("1"),
HConstants.LATEST_TIMESTAMP, Bytes.toBytes("1"), new Tag[] {
new Tag((byte) 1, Bytes.toBytes("teststring1")),
new Tag((byte) 2, Bytes.toBytes("testString2")) });
final KeyValue kv2 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("2"),
HConstants.LATEST_TIMESTAMP, Bytes.toBytes("2"), new Tag[] { new Tag((byte) 1,
Bytes.toBytes("teststring3")), });
final KeyValue kv3 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("3"),
HConstants.LATEST_TIMESTAMP, Bytes.toBytes("3"), new Tag[] {
new Tag((byte) 2, Bytes.toBytes("teststring4")),
new Tag((byte) 2, Bytes.toBytes("teststring5")),
new Tag((byte) 1, Bytes.toBytes("teststring6")) });
encoder.write(kv1);
encoder.write(kv2);
encoder.write(kv3);
encoder.flush();
dos.close();
long offset = cos.getCount();
CountingInputStream cis = new CountingInputStream(new ByteArrayInputStream(baos.toByteArray()));
DataInputStream dis = new DataInputStream(cis);
Codec.Decoder decoder = codec.getDecoder(dis);
assertTrue(decoder.advance());
Cell c = decoder.current();
assertTrue(CellComparator.equals(c, kv1));
assertTrue(decoder.advance());
c = decoder.current();
assertTrue(CellComparator.equals(c, kv2));
assertTrue(decoder.advance());
c = decoder.current();
assertTrue(CellComparator.equals(c, kv3));
assertFalse(decoder.advance());
dis.close();
assertEquals(offset, cis.getCount());
}
}

View File

@ -47,7 +47,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
protected static final Log LOG = LogFactory.getLog(IntegrationTestIngest.class);
protected IntegrationTestingUtility util;
protected HBaseCluster cluster;
private LoadTestTool loadTool;
protected LoadTestTool loadTool;
protected void setUp(int numSlavesBase) throws Exception {
util = getTestingUtil(null);
@ -84,7 +84,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
@Test
public void internalRunIngestTest() throws Exception {
runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 1024, 10);
runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 1024, 10, false, 10);
}
@Override
@ -104,7 +104,7 @@ public class IntegrationTestIngest extends IntegrationTestBase {
}
protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter,
int colsPerKey, int recordSize, int writeThreads) throws Exception {
int colsPerKey, int recordSize, int writeThreads, boolean useTags, int maxTagsPerKey) throws Exception {
LOG.info("Running ingest");
LOG.info("Cluster size:" + util.getHBaseClusterInterface().getClusterStatus().getServersSize());
@ -118,39 +118,46 @@ public class IntegrationTestIngest extends IntegrationTestBase {
LOG.info("Intended run time: " + (runtime/60000) + " min, left:" +
((runtime - (System.currentTimeMillis() - start))/60000) + " min");
int ret = loadTool.run(new String[] {
"-tn", getTablename(),
"-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads),
"-start_key", String.valueOf(startKey),
"-num_keys", String.valueOf(numKeys),
"-skip_init"
});
int ret = -1;
if (useTags) {
ret = loadTool.run(new String[] { "-tn", getTablename(), "-write",
String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), "-start_key",
String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init",
"-usetags", "-num_tags", String.format("1:%d", maxTagsPerKey) });
} else {
ret = loadTool.run(new String[] { "-tn", getTablename(), "-write",
String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), "-start_key",
String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init" });
}
if (0 != ret) {
String errorMsg = "Load failed with error code " + ret;
LOG.error(errorMsg);
Assert.fail(errorMsg);
}
ret = loadTool.run(new String[] {
"-tn", getTablename(),
"-update", String.format("60:%d", writeThreads),
"-start_key", String.valueOf(startKey),
"-num_keys", String.valueOf(numKeys),
"-skip_init"
});
if (useTags) {
ret = loadTool.run(new String[] { "-tn", getTablename(), "-update",
String.format("60:%d", writeThreads), "-start_key", String.valueOf(startKey),
"-num_keys", String.valueOf(numKeys), "-skip_init", "-usetags", "-num_tags",
String.format("1:%d", maxTagsPerKey) });
} else {
ret = loadTool.run(new String[] { "-tn", getTablename(), "-update",
String.format("60:%d", writeThreads), "-start_key", String.valueOf(startKey),
"-num_keys", String.valueOf(numKeys), "-skip_init" });
}
if (0 != ret) {
String errorMsg = "Update failed with error code " + ret;
LOG.error(errorMsg);
Assert.fail(errorMsg);
}
ret = loadTool.run(new String[] {
"-tn", getTablename(),
"-read", "100:20",
"-start_key", String.valueOf(startKey),
"-num_keys", String.valueOf(numKeys),
"-skip_init"
});
if (useTags) {
ret = loadTool.run(new String[] { "-tn", getTablename(), "-read", "100:20", "-start_key",
String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init",
"-usetags", "-num_tags", String.format("1:%d", maxTagsPerKey) });
} else {
ret = loadTool.run(new String[] { "-tn", getTablename(), "-read", "100:20", "-start_key",
String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init" });
}
if (0 != ret) {
String errorMsg = "Verification failed with error code " + ret;
LOG.error(errorMsg);

View File

@ -0,0 +1,38 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import org.junit.Before;
import org.junit.experimental.categories.Category;
@Category(IntegrationTests.class)
public class IntegrationTestIngestWithTags extends IntegrationTestIngest {
@Before
@Override
public void setUp() throws Exception {
getTestingUtil(conf).getConfiguration().setInt("hfile.format.version", 3);
super.setUp();
}
@Override
protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, int colsPerKey,
int recordSize, int writeThreads, boolean useTags, int maxTagsPerKey) throws Exception {
super.runIngestTest(defaultRunTime, keysPerServerPerIter, colsPerKey, recordSize, writeThreads,
true, 10);
}
}

View File

@ -230,7 +230,8 @@ public class IntegrationTestLazyCfLoading {
writer.setMultiPut(true);
LOG.info("Starting writer; the number of keys to write is " + keysToWrite);
writer.start(1, keysToWrite, WRITER_THREADS);
// TODO : Need to see if tag support has to be given here in the integration test suite
writer.start(1, keysToWrite, WRITER_THREADS, false, 0, 0);
// Now, do scans.
long now = EnvironmentEdgeManager.currentTimeMillis();

View File

@ -34,7 +34,6 @@ import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher;
import org.apache.hadoop.hbase.codec.prefixtree.encode.EncoderFactory;
import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
@ -42,8 +41,8 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.io.RawComparator;
/**
* This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or
@ -69,7 +68,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{
* enough with the concept of the HFileBlockEncodingContext.
*/
@Override
public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion,
public void encodeKeyValues(ByteBuffer in,
HFileBlockEncodingContext blkEncodingCtx) throws IOException {
if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) {
throw new IOException(this.getClass().getName() + " only accepts "
@ -80,7 +79,8 @@ public class PrefixTreeCodec implements DataBlockEncoder{
= (HFileBlockDefaultEncodingContext) blkEncodingCtx;
encodingCtx.prepareEncoding();
DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder();
internalEncodeKeyValues(dataOut, in, includesMvccVersion);
internalEncodeKeyValues(dataOut, in, encodingCtx.getHFileContext().shouldIncludeMvcc(),
encodingCtx.getHFileContext().shouldIncludeTags());
//do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE?
if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) {
@ -91,26 +91,26 @@ public class PrefixTreeCodec implements DataBlockEncoder{
}
private void internalEncodeKeyValues(DataOutputStream encodedOutputStream,
ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException {
ByteBuffer rawKeyValues, boolean includesMvccVersion, boolean includesTag) throws IOException {
rawKeyValues.rewind();
PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion);
try{
try {
KeyValue kv;
while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) {
while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion, includesTag)) != null) {
builder.write(kv);
}
builder.flush();
}finally{
} finally {
EncoderFactory.checkIn(builder);
}
}
@Override
public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion)
public ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx)
throws IOException {
return decodeKeyValues(source, 0, 0, includesMvccVersion);
return decodeKeyValues(source, 0, 0, decodingCtx);
}
@ -118,9 +118,8 @@ public class PrefixTreeCodec implements DataBlockEncoder{
* I don't think this method is called during normal HBase operation, so efficiency is not
* important.
*/
@Override
public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength,
int skipLastBytes, boolean includesMvccVersion) throws IOException {
int skipLastBytes, HFileBlockDecodingContext decodingCtx) throws IOException {
ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste
sourceAsBuffer.mark();
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer);
@ -131,17 +130,19 @@ public class PrefixTreeCodec implements DataBlockEncoder{
result.rewind();
CellSearcher searcher = null;
try {
searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion);
boolean includesMvcc = decodingCtx.getHFileContext().shouldIncludeMvcc();
searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvcc);
while (searcher.advance()) {
KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current());
// needs to be modified for DirectByteBuffers. no existing methods to
// write VLongs to byte[]
int offset = result.arrayOffset() + result.position();
KeyValueUtil.appendToByteArray(currentCell, result.array(), offset);
System.arraycopy(currentCell.getBuffer(), currentCell.getOffset(), result.array(), offset,
currentCell.getLength());
int keyValueLength = KeyValueUtil.length(currentCell);
ByteBufferUtils.skip(result, keyValueLength);
offset += keyValueLength;
if (includesMvccVersion) {
if (includesMvcc) {
ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion());
}
}
@ -158,7 +159,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{
block.rewind();
PrefixTreeArraySearcher searcher = null;
try {
//should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
// should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will
searcher = DecoderFactory.checkOut(block, true);
if (!searcher.positionAtFirstCell()) {
return null;
@ -170,19 +171,19 @@ public class PrefixTreeCodec implements DataBlockEncoder{
}
@Override
public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm,
DataBlockEncoding encoding, byte[] header) {
public HFileBlockEncodingContext newDataBlockEncodingContext(
DataBlockEncoding encoding, byte[] header, HFileContext meta) {
if(DataBlockEncoding.PREFIX_TREE != encoding){
//i'm not sure why encoding is in the interface. Each encoder implementation should probably
//know it's encoding type
throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported");
}
return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header);
return new HFileBlockDefaultEncodingContext(encoding, header, meta);
}
@Override
public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) {
return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
public HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta) {
return new HFileBlockDefaultDecodingContext(meta);
}
/**
@ -190,7 +191,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{
* the way to this point.
*/
@Override
public EncodedSeeker createSeeker(KVComparator comparator, boolean includesMvccVersion) {
public EncodedSeeker createSeeker(KVComparator comparator, HFileBlockDecodingContext decodingCtx) {
if (comparator instanceof RawBytesComparator){
throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator");
} else if (comparator instanceof MetaComparator){
@ -198,7 +199,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{
+"table");
}
return new PrefixTreeSeeker(includesMvccVersion);
return new PrefixTreeSeeker(decodingCtx.getHFileContext().shouldIncludeMvcc());
}
}

View File

@ -59,13 +59,13 @@ public class DecoderFactory {
/**************************** helper ******************************/
public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer,
PrefixTreeArraySearcher searcher, boolean includeMvccVersion) {
if (searcher == null) {
PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer);
searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(),
blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength());
blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength(),
blockMeta.getMaxTagsLength());
searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);
return searcher;
}
@ -78,8 +78,9 @@ public class DecoderFactory {
int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength());
int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(),
searcher.getQualifierBufferLength());
int tagBufferLength = Math.max(blockMeta.getMaxTagsLength(), searcher.getTagBufferLength());
searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength,
qualifierBufferLength);
qualifierBufferLength, tagBufferLength);
}
//this is where we parse the BlockMeta
searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion);

View File

@ -33,8 +33,8 @@ public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner imp
/***************** construct ******************************/
public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
}

View File

@ -27,6 +27,8 @@ import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader;
import org.apache.hadoop.hbase.codec.prefixtree.decode.row.RowNodeReader;
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder;
import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.util.Bytes;
/**
* Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and
@ -53,6 +55,7 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
protected RowNodeReader currentRowNode;
protected ColumnReader familyReader;
protected ColumnReader qualifierReader;
protected ColumnReader tagsReader;
protected TimestampDecoder timestampDecoder;
protected MvccVersionDecoder mvccVersionDecoder;
@ -63,17 +66,19 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
/*********************** construct ******************************/
// pass in blockMeta so we can initialize buffers big enough for all cells in the block
public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength) {
public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
this.rowNodes = new RowNodeReader[rowTreeDepth];
for (int i = 0; i < rowNodes.length; ++i) {
rowNodes[i] = new RowNodeReader();
}
this.rowBuffer = new byte[rowBufferLength];
this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH];
this.familyReader = new ColumnReader(familyBuffer, true);
this.familyReader = new ColumnReader(familyBuffer, ColumnNodeType.FAMILY);
this.qualifierBuffer = new byte[qualifierBufferLength];
this.qualifierReader = new ColumnReader(qualifierBuffer, false);
this.tagsBuffer = new byte[tagsBufferLength];
this.qualifierReader = new ColumnReader(qualifierBuffer, ColumnNodeType.QUALIFIER);
this.tagsReader = new ColumnReader(tagsBuffer, ColumnNodeType.TAGS);
this.timestampDecoder = new TimestampDecoder();
this.mvccVersionDecoder = new MvccVersionDecoder();
}
@ -95,6 +100,9 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) {
return false;
}
if(tagsBuffer.length < blockMeta.getMaxTagsLength()) {
return false;
}
return true;
}
@ -106,6 +114,8 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
this.familyReader.initOnBlock(blockMeta, block);
this.qualifierOffset = qualifierBuffer.length;
this.qualifierReader.initOnBlock(blockMeta, block);
this.tagsOffset = tagsBuffer.length;
this.tagsReader.initOnBlock(blockMeta, block);
this.timestampDecoder.initOnBlock(blockMeta, block);
this.mvccVersionDecoder.initOnBlock(blockMeta, block);
this.includeMvccVersion = includeMvccVersion;
@ -129,6 +139,8 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
type = DEFAULT_TYPE;
absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized
valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length
tagsOffset = blockMeta.getMaxTagsLength();
tagsLength = 0;
}
/**
@ -427,6 +439,10 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
currentCellIndex = cellIndex;
populateFamily();
populateQualifier();
// Read tags only if there are tags in the meta
if(blockMeta.getNumTagsBytes() != 0) {
populateTag();
}
populateTimestamp();
populateMvccVersion();
populateType();
@ -445,6 +461,12 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
qualifierLength = qualifierReader.getColumnLength();
}
protected void populateTag() {
int tagTreeIndex = currentRowNode.getTagOffset(currentCellIndex, blockMeta);
tagsOffset = tagsReader.populateBuffer(tagTreeIndex).getColumnOffset();
tagsLength = (short)tagsReader.getColumnLength();
}
protected void populateTimestamp() {
if (blockMeta.isAllSameTimestamp()) {
timestamp = blockMeta.getMinTimestamp();
@ -480,7 +502,6 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta);
}
/**************** getters ***************************/
public byte[] getTreeBytes() {
@ -503,4 +524,8 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne
return qualifierBuffer.length;
}
public int getTagBufferLength() {
return tagsBuffer.length;
}
}

View File

@ -48,8 +48,8 @@ public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner im
/*************** construct ******************************/
public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth,
int rowBufferLength, int qualifierBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength);
int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) {
super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength);
}

View File

@ -70,6 +70,9 @@ public class PrefixTreeCell implements Cell, Comparable<Cell> {
protected int absoluteValueOffset;
protected int valueLength;
protected byte[] tagsBuffer;
protected int tagsOffset;
protected short tagsLength;
/********************** Cell methods ******************/
@ -217,17 +220,17 @@ public class PrefixTreeCell implements Cell, Comparable<Cell> {
@Override
public int getTagsOffset() {
throw new UnsupportedOperationException("Not implemented");
return tagsOffset;
}
@Override
public short getTagsLength() {
throw new UnsupportedOperationException("Not implemented");
return tagsLength;
}
@Override
public byte[] getTagsArray() {
throw new UnsupportedOperationException("Not implemented");
return this.tagsBuffer;
}
}

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.util.vint.UVIntTool;
@ -30,9 +31,8 @@ public class ColumnNodeReader {
protected PrefixTreeBlockMeta blockMeta;
protected byte[] block;
protected ColumnNodeType nodeType;
protected byte[] columnBuffer;
protected boolean familyVsQualifier;
protected int offsetIntoBlock;
@ -43,9 +43,9 @@ public class ColumnNodeReader {
/************** construct *************************/
public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) {
public ColumnNodeReader(byte[] columnBuffer, ColumnNodeType nodeType) {
this.columnBuffer = columnBuffer;
this.familyVsQualifier = familyVsQualifier;
this.nodeType = nodeType;
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
@ -62,10 +62,12 @@ public class ColumnNodeReader {
tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength);
int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength;
int offsetWidth;
if (familyVsQualifier) {
if(nodeType == ColumnNodeType.FAMILY) {
offsetWidth = blockMeta.getFamilyOffsetWidth();
} else {
} else if(nodeType == ColumnNodeType.QUALIFIER) {
offsetWidth = blockMeta.getQualifierOffsetWidth();
} else {
offsetWidth = blockMeta.getTagsOffsetWidth();
}
parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth);
}
@ -75,10 +77,12 @@ public class ColumnNodeReader {
}
public boolean isRoot() {
if (familyVsQualifier) {
if (nodeType == ColumnNodeType.FAMILY) {
return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset();
} else {
} else if (nodeType == ColumnNodeType.QUALIFIER) {
return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset();
} else {
return offsetIntoBlock == blockMeta.getAbsoluteTagsOffset();
}
}

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.codec.prefixtree.decode.column;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
/**
* Position one of these appropriately in the data block and you can call its methods to retrieve
@ -35,17 +36,17 @@ public class ColumnReader {
protected byte[] columnBuffer;
protected int columnOffset;
protected int columnLength;
protected boolean familyVsQualifier;
protected ColumnNodeType nodeType;
protected ColumnNodeReader columnNodeReader;
/******************** construct *******************/
public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) {
public ColumnReader(byte[] columnBuffer, ColumnNodeType nodeType) {
this.columnBuffer = columnBuffer;
this.familyVsQualifier = familyVsQualifier;
this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier);
this.nodeType = nodeType;
this.columnNodeReader = new ColumnNodeReader(columnBuffer, nodeType);
}
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) {
@ -61,11 +62,13 @@ public class ColumnReader {
clearColumnBuffer();
int nextRelativeOffset = offsetIntoColumnData;
while (true) {
int absoluteOffset;
if (familyVsQualifier) {
int absoluteOffset = 0;
if (nodeType == ColumnNodeType.FAMILY) {
absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset;
} else {
} else if (nodeType == ColumnNodeType.QUALIFIER) {
absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset;
} else {
absoluteOffset = blockMeta.getAbsoluteTagsOffset() + nextRelativeOffset;
}
columnNodeReader.positionAt(absoluteOffset);
columnOffset -= columnNodeReader.getTokenLength();

View File

@ -20,8 +20,8 @@ package org.apache.hadoop.hbase.codec.prefixtree.decode.row;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.util.SimpleByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.SimpleByteRange;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.hbase.util.vint.UVIntTool;
@ -52,13 +52,14 @@ public class RowNodeReader {
protected int operationTypesOffset;
protected int valueOffsetsOffset;
protected int valueLengthsOffset;
protected int tagOffsetsOffset;
protected int nextNodeOffsetsOffset;
/******************* construct **************************/
public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) {
this.block = block;
this.block = block;
this.offset = offset;
resetFanIndex();
@ -73,8 +74,15 @@ public class RowNodeReader {
this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells);
this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth();
this.timestampIndexesOffset = qualifierOffsetsOffset + numCells
* blockMeta.getQualifierOffsetWidth();
this.tagOffsetsOffset = this.qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth();
// TODO : This code may not be needed now..As we always consider tags to be present
if(blockMeta.getTagsOffsetWidth() == 0) {
// Make both of them same so that we know that there are no tags
this.tagOffsetsOffset = this.qualifierOffsetsOffset;
this.timestampIndexesOffset = qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth();
} else {
this.timestampIndexesOffset = tagOffsetsOffset + numCells * blockMeta.getTagsOffsetWidth();
}
this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells
* blockMeta.getTimestampIndexWidth();
this.operationTypesOffset = mvccVersionIndexesOffset + numCells
@ -134,6 +142,12 @@ public class RowNodeReader {
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getTagOffset(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getTagsOffsetWidth();
int startIndex = tagOffsetsOffset + fIntWidth * index;
return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth);
}
public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) {
int fIntWidth = blockMeta.getTimestampIndexWidth();
int startIndex = timestampIndexesOffset + fIntWidth * index;

View File

@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.CellTypeEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder;
import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowSectionWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
@ -42,7 +43,6 @@ import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet;
import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet;
import org.apache.hadoop.hbase.util.vint.UFIntTool;
import org.apache.hadoop.io.WritableUtils;
/**
* This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are
* added they are completely copied into the various encoding structures. This is important because
@ -86,6 +86,7 @@ public class PrefixTreeEncoder implements CellOutputStream {
protected ByteRange rowRange;
protected ByteRange familyRange;
protected ByteRange qualifierRange;
protected ByteRange tagsRange;
/*
* incoming Cell fields are copied into these arrays
@ -94,7 +95,9 @@ public class PrefixTreeEncoder implements CellOutputStream {
protected long[] mvccVersions;
protected byte[] typeBytes;
protected int[] valueOffsets;
protected int[] tagsOffsets;
protected byte[] values;
protected byte[] tags;
protected PrefixTreeBlockMeta blockMeta;
@ -114,7 +117,7 @@ public class PrefixTreeEncoder implements CellOutputStream {
*/
protected ByteRangeSet familyDeduplicator;
protected ByteRangeSet qualifierDeduplicator;
protected ByteRangeSet tagsDeduplicator;
/*
* Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory
* trie structure with nodes connected by memory pointers (not serializable yet).
@ -122,6 +125,7 @@ public class PrefixTreeEncoder implements CellOutputStream {
protected Tokenizer rowTokenizer;
protected Tokenizer familyTokenizer;
protected Tokenizer qualifierTokenizer;
protected Tokenizer tagsTokenizer;
/*
* Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write
@ -130,6 +134,7 @@ public class PrefixTreeEncoder implements CellOutputStream {
protected RowSectionWriter rowWriter;
protected ColumnSectionWriter familyWriter;
protected ColumnSectionWriter qualifierWriter;
protected ColumnSectionWriter tagsWriter;
/*
* Integers used for counting cells and bytes. We keep track of the size of the Cells as if they
@ -138,7 +143,9 @@ public class PrefixTreeEncoder implements CellOutputStream {
protected int totalCells = 0;
protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues
protected int totalValueBytes = 0;
protected int totalTagBytes = 0;
protected int maxValueLength = 0;
protected int maxTagLength = 0;
protected int totalBytes = 0;//
@ -170,6 +177,7 @@ public class PrefixTreeEncoder implements CellOutputStream {
this.rowWriter = new RowSectionWriter();
this.familyWriter = new ColumnSectionWriter();
this.qualifierWriter = new ColumnSectionWriter();
initializeTagHelpers();
reset(outputStream, includeMvccVersion);
}
@ -179,9 +187,11 @@ public class PrefixTreeEncoder implements CellOutputStream {
this.includeMvccVersion = includeMvccVersion;
this.outputStream = outputStream;
valueOffsets[0] = 0;
familyDeduplicator.reset();
qualifierDeduplicator.reset();
tagsDeduplicator.reset();
tagsWriter.reset();
tagsTokenizer.reset();
rowTokenizer.reset();
timestampEncoder.reset();
mvccVersionEncoder.reset();
@ -199,6 +209,14 @@ public class PrefixTreeEncoder implements CellOutputStream {
totalBytes = 0;
}
protected void initializeTagHelpers() {
this.tagsRange = new SimpleByteRange();
this.tagsDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet()
: new ByteRangeTreeSet();
this.tagsTokenizer = new Tokenizer();
this.tagsWriter = new ColumnSectionWriter();
}
/**
* Check that the arrays used to hold cell fragments are large enough for the cell that is being
* added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the
@ -259,10 +277,16 @@ public class PrefixTreeEncoder implements CellOutputStream {
rowTokenizer.addSorted(CellUtil.fillRowRange(cell, rowRange));
addFamilyPart(cell);
addQualifierPart(cell);
addTagPart(cell);
addAfterRowFamilyQualifier(cell);
}
private void addTagPart(Cell cell) {
CellUtil.fillTagRange(cell, tagsRange);
tagsDeduplicator.add(tagsRange);
}
/***************** internal add methods ************************/
private void addAfterRowFamilyQualifier(Cell cell){
@ -333,6 +357,7 @@ public class PrefixTreeEncoder implements CellOutputStream {
rowWriter.writeBytes(outputStream);
familyWriter.writeBytes(outputStream);
qualifierWriter.writeBytes(outputStream);
tagsWriter.writeBytes(outputStream);
timestampEncoder.writeBytes(outputStream);
mvccVersionEncoder.writeBytes(outputStream);
//CellType bytes are in the row nodes. there is no additional type section
@ -349,12 +374,13 @@ public class PrefixTreeEncoder implements CellOutputStream {
blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset));
blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength));
blockMeta.setNumValueBytes(totalValueBytes);
totalBytes += totalValueBytes;
totalBytes += totalTagBytes + totalValueBytes;
//these compile methods will add to totalBytes
compileTypes();
compileMvccVersions();
compileTimestamps();
compileTags();
compileQualifiers();
compileFamilies();
compileRows();
@ -397,7 +423,7 @@ public class PrefixTreeEncoder implements CellOutputStream {
blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size());
qualifierDeduplicator.compile();
qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges());
qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false);
qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, ColumnNodeType.QUALIFIER);
qualifierWriter.compile();
int numQualifierBytes = qualifierWriter.getNumBytes();
blockMeta.setNumQualifierBytes(numQualifierBytes);
@ -408,13 +434,24 @@ public class PrefixTreeEncoder implements CellOutputStream {
blockMeta.setNumUniqueFamilies(familyDeduplicator.size());
familyDeduplicator.compile();
familyTokenizer.addAll(familyDeduplicator.getSortedRanges());
familyWriter.reconstruct(blockMeta, familyTokenizer, true);
familyWriter.reconstruct(blockMeta, familyTokenizer, ColumnNodeType.FAMILY);
familyWriter.compile();
int numFamilyBytes = familyWriter.getNumBytes();
blockMeta.setNumFamilyBytes(numFamilyBytes);
totalBytes += numFamilyBytes;
}
protected void compileTags() {
blockMeta.setNumUniqueTags(tagsDeduplicator.size());
tagsDeduplicator.compile();
tagsTokenizer.addAll(tagsDeduplicator.getSortedRanges());
tagsWriter.reconstruct(blockMeta, tagsTokenizer, ColumnNodeType.TAGS);
tagsWriter.compile();
int numTagBytes = tagsWriter.getNumBytes();
blockMeta.setNumTagsBytes(numTagBytes);
totalBytes += numTagBytes;
}
protected void compileRows() {
rowWriter.reconstruct(this);
rowWriter.compile();
@ -476,6 +513,10 @@ public class PrefixTreeEncoder implements CellOutputStream {
return qualifierDeduplicator;
}
public ByteRangeSet getTagSorter() {
return tagsDeduplicator;
}
public ColumnSectionWriter getFamilyWriter() {
return familyWriter;
}
@ -484,6 +525,10 @@ public class PrefixTreeEncoder implements CellOutputStream {
return qualifierWriter;
}
public ColumnSectionWriter getTagWriter() {
return tagsWriter;
}
public RowSectionWriter getRowWriter() {
return rowWriter;
}

View File

@ -23,6 +23,7 @@ import java.io.OutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
@ -48,20 +49,19 @@ public class ColumnNodeWriter{
protected TokenizerNode builderNode;
protected PrefixTreeBlockMeta blockMeta;
protected boolean familyVsQualifier;
protected int tokenLength;
protected byte[] token;
protected int parentStartPosition;
protected ColumnNodeType nodeType;
/*************** construct **************************/
public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode,
boolean familyVsQualifier) {
ColumnNodeType nodeType) {
this.blockMeta = blockMeta;
this.builderNode = builderNode;
this.familyVsQualifier = familyVsQualifier;
this.nodeType = nodeType;
calculateTokenLength();
}
@ -93,10 +93,12 @@ public class ColumnNodeWriter{
public void writeBytes(OutputStream os) throws IOException {
int parentOffsetWidth;
if (familyVsQualifier) {
if (this.nodeType == ColumnNodeType.FAMILY) {
parentOffsetWidth = blockMeta.getFamilyOffsetWidth();
} else {
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
parentOffsetWidth = blockMeta.getQualifierOffsetWidth();
} else {
parentOffsetWidth = blockMeta.getTagsOffsetWidth();
}
UVIntTool.writeBytes(tokenLength, os);
os.write(token);

View File

@ -25,6 +25,7 @@ import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.CollectionUtils;
@ -60,7 +61,7 @@ public class ColumnSectionWriter {
private PrefixTreeBlockMeta blockMeta;
private boolean familyVsQualifier;
private ColumnNodeType nodeType;
private Tokenizer tokenizer;
private int numBytes = 0;
private ArrayList<TokenizerNode> nonLeaves;
@ -79,16 +80,16 @@ public class ColumnSectionWriter {
}
public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
boolean familyVsQualifier) {
ColumnNodeType nodeType) {
this();// init collections
reconstruct(blockMeta, builder, familyVsQualifier);
reconstruct(blockMeta, builder, nodeType);
}
public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder,
boolean familyVsQualifier) {
ColumnNodeType nodeType) {
this.blockMeta = blockMeta;
this.tokenizer = builder;
this.familyVsQualifier = familyVsQualifier;
this.nodeType = nodeType;
}
public void reset() {
@ -102,14 +103,19 @@ public class ColumnSectionWriter {
/****************** methods *******************************/
public ColumnSectionWriter compile() {
if (familyVsQualifier) {
if (this.nodeType == ColumnNodeType.FAMILY) {
// do nothing. max family length fixed at Byte.MAX_VALUE
} else {
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength());
} else {
blockMeta.setMaxTagsLength(tokenizer.getMaxElementLength());
}
compilerInternals();
return this;
}
protected void compilerInternals() {
tokenizer.setNodeFirstInsertionIndexes();
tokenizer.appendNodes(nonLeaves, true, false);
tokenizer.appendNodes(leaves, false, true);
@ -121,7 +127,7 @@ public class ColumnSectionWriter {
columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes));
for (int i = 0; i < allNodes.size(); ++i) {
TokenizerNode node = allNodes.get(i);
columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier));
columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType));
}
// leaf widths are known at this point, so add them up
@ -142,10 +148,12 @@ public class ColumnSectionWriter {
break;
}// it fits
}
if (familyVsQualifier) {
if (this.nodeType == ColumnNodeType.FAMILY) {
blockMeta.setFamilyOffsetWidth(parentOffsetWidth);
} else {
} else if (this.nodeType == ColumnNodeType.QUALIFIER) {
blockMeta.setQualifierOffsetWidth(parentOffsetWidth);
} else {
blockMeta.setTagsOffsetWidth(parentOffsetWidth);
}
int forwardIndex = 0;
@ -165,8 +173,6 @@ public class ColumnSectionWriter {
}
tokenizer.appendOutputArrayOffsets(outputArrayOffsets);
return this;
}
public void writeBytes(OutputStream os) throws IOException {

View File

@ -0,0 +1,28 @@
package org.apache.hadoop.hbase.codec.prefixtree.encode.other;
import org.apache.hadoop.classification.InterfaceAudience;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Specifies the type of columnnode writer.
*/
@InterfaceAudience.Private
public enum ColumnNodeType {
FAMILY, QUALIFIER, TAGS;
}

View File

@ -105,6 +105,7 @@ public class RowNodeWriter{
if(tokenizerNode.hasOccurrences()){
int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth()
+ blockMeta.getQualifierOffsetWidth()
+ blockMeta.getTagsOffsetWidth()
+ blockMeta.getTimestampIndexWidth()
+ blockMeta.getMvccVersionIndexWidth()
+ blockMeta.getKeyValueTypeWidth()
@ -132,12 +133,12 @@ public class RowNodeWriter{
//UFInt indexes and offsets for each cell in the row (if nub or leaf)
writeFamilyNodeOffsets(os);
writeQualifierNodeOffsets(os);
writeTagNodeOffsets(os);
writeTimestampIndexes(os);
writeMvccVersionIndexes(os);
writeCellTypes(os);
writeValueOffsets(os);
writeValueLengths(os);
//offsets to the children of this row trie node (if branch or nub)
writeNextRowTrieNodeOffsets(os);
}
@ -220,6 +221,20 @@ public class RowNodeWriter{
}
}
protected void writeTagNodeOffsets(OutputStream os) throws IOException {
if (blockMeta.getTagsOffsetWidth() <= 0) {
return;
}
for (int i = 0; i < numCells; ++i) {
int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i;
int sortedIndex = prefixTreeEncoder.getTagSorter().getSortedIndexForInsertionId(
cellInsertionIndex);
int indexedTagOffset = prefixTreeEncoder.getTagWriter().getOutputArrayOffset(
sortedIndex);
UFIntTool.writeBytes(blockMeta.getTagsOffsetWidth(), indexedTagOffset, os);
}
}
protected void writeTimestampIndexes(OutputStream os) throws IOException {
if (blockMeta.getTimestampIndexWidth() <= 0) {
return;
@ -270,7 +285,6 @@ public class RowNodeWriter{
}
}
/**
* If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes.
*/

View File

@ -25,6 +25,8 @@ import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueTestUtil;
import org.apache.hadoop.hbase.codec.prefixtree.row.TestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
@ -47,9 +49,12 @@ public class TestKeyValueTool {
@Test
public void testRoundTripToBytes() {
if(rows instanceof TestRowDataTrivialWithTags || rows instanceof TestRowDataRandomKeyValuesWithTags) {
return;
}
List<KeyValue> kvs = rows.getInputs();
ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false);
List<KeyValue> roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false);
List<KeyValue> roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false, false);
Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray());
}
}

View File

@ -26,6 +26,7 @@ import java.util.List;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader;
import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter;
import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer;
import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode;
import org.apache.hadoop.hbase.util.ByteRange;
@ -92,12 +93,12 @@ public class TestColumnBuilder {
}
Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size());
writer = new ColumnSectionWriter(blockMeta, builder, false);
writer = new ColumnSectionWriter(blockMeta, builder, ColumnNodeType.QUALIFIER);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
writer.compile().writeBytes(baos);
bytes = baos.toByteArray();
buffer = new byte[blockMeta.getMaxQualifierLength()];
reader = new ColumnReader(buffer, false);
reader = new ColumnReader(buffer, ColumnNodeType.QUALIFIER);
reader.initOnBlock(blockMeta, bytes);
List<TokenizerNode> builderNodes = Lists.newArrayList();

View File

@ -32,10 +32,12 @@ import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNub;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSimple;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivial;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrls;
import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
@ -65,6 +67,7 @@ public interface TestRowData {
//simple
all.add(new TestRowDataEmpty());
all.add(new TestRowDataTrivial());
all.add(new TestRowDataTrivialWithTags());
all.add(new TestRowDataSimple());
all.add(new TestRowDataDeeper());
@ -83,6 +86,7 @@ public interface TestRowData {
all.add(new TestRowDataUrlsExample());
all.add(new TestRowDataExerciseFInts());
all.add(new TestRowDataRandomKeyValues());
all.add(new TestRowDataRandomKeyValuesWithTags());
return all;
}

View File

@ -75,6 +75,7 @@ public class TestRowEncoder {
@Before
public void compile() throws IOException {
// Always run with tags. But should also ensure that KVs without tags work fine
os = new ByteArrayOutputStream(1 << 20);
encoder = new PrefixTreeEncoder(os, includeMemstoreTS);
@ -92,7 +93,8 @@ public class TestRowEncoder {
blockMetaReader = new PrefixTreeBlockMeta(buffer);
searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(),
blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength());
blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength(),
blockMetaReader.getMaxTagsLength());
searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS);
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
import com.google.common.collect.Lists;
/**
* Generated KVs with tags
*/
public class TestRowDataRandomKeyValuesWithTags extends BaseTestRowData {
static List<KeyValue> d = Lists.newArrayList();
static RedundantKVGenerator generator = new RedundantKVGenerator();
static {
d = generator.generateTestKeyValues(1 << 10, true);
}
@Override
public List<KeyValue> getInputs() {
return d;
}
}

View File

@ -0,0 +1,79 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.codec.prefixtree.row.data;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta;
import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition;
import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Assert;
import com.google.common.collect.Lists;
public class TestRowDataTrivialWithTags extends BaseTestRowData{
static byte[] rA = Bytes.toBytes("rA"), rB = Bytes.toBytes("rB"),// turn "r"
// into a
// branch for
// the
// Searcher
// tests
cf = Bytes.toBytes("fam"), cq0 = Bytes.toBytes("q0"), v0 = Bytes.toBytes("v0");
static long ts = 55L;
static List<KeyValue> d = Lists.newArrayList();
static {
List<Tag> tagList = new ArrayList<Tag>();
Tag t = new Tag((byte) 1, "visisbility");
tagList.add(t);
t = new Tag((byte) 2, "ACL");
tagList.add(t);
d.add(new KeyValue(rA, cf, cq0, ts, v0, tagList));
d.add(new KeyValue(rB, cf, cq0, ts, v0, tagList));
}
@Override
public List<KeyValue> getInputs() {
return d;
}
@Override
public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) {
// node[0] -> root[r]
// node[1] -> leaf[A], etc
Assert.assertEquals(2, blockMeta.getRowTreeDepth());
}
@Override
public void individualSearcherAssertions(CellSearcher searcher) {
/**
* The searcher should get a token mismatch on the "r" branch. Assert that
* it skips not only rA, but rB as well.
*/
KeyValue afterLast = KeyValue.createFirstOnRow(Bytes.toBytes("zzz"));
CellScannerPosition position = searcher.positionAtOrAfter(afterLast);
Assert.assertEquals(CellScannerPosition.AFTER_LAST, position);
Assert.assertNull(searcher.current());
}
}

View File

@ -201,6 +201,16 @@ public final class CellProtos {
* <code>optional bytes value = 6;</code>
*/
com.google.protobuf.ByteString getValue();
// optional bytes tags = 7;
/**
* <code>optional bytes tags = 7;</code>
*/
boolean hasTags();
/**
* <code>optional bytes tags = 7;</code>
*/
com.google.protobuf.ByteString getTags();
}
/**
* Protobuf type {@code Cell}
@ -294,6 +304,11 @@ public final class CellProtos {
value_ = input.readBytes();
break;
}
case 58: {
bitField0_ |= 0x00000040;
tags_ = input.readBytes();
break;
}
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@ -430,6 +445,22 @@ public final class CellProtos {
return value_;
}
// optional bytes tags = 7;
public static final int TAGS_FIELD_NUMBER = 7;
private com.google.protobuf.ByteString tags_;
/**
* <code>optional bytes tags = 7;</code>
*/
public boolean hasTags() {
return ((bitField0_ & 0x00000040) == 0x00000040);
}
/**
* <code>optional bytes tags = 7;</code>
*/
public com.google.protobuf.ByteString getTags() {
return tags_;
}
private void initFields() {
row_ = com.google.protobuf.ByteString.EMPTY;
family_ = com.google.protobuf.ByteString.EMPTY;
@ -437,6 +468,7 @@ public final class CellProtos {
timestamp_ = 0L;
cellType_ = org.apache.hadoop.hbase.protobuf.generated.CellProtos.CellType.MINIMUM;
value_ = com.google.protobuf.ByteString.EMPTY;
tags_ = com.google.protobuf.ByteString.EMPTY;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@ -468,6 +500,9 @@ public final class CellProtos {
if (((bitField0_ & 0x00000020) == 0x00000020)) {
output.writeBytes(6, value_);
}
if (((bitField0_ & 0x00000040) == 0x00000040)) {
output.writeBytes(7, tags_);
}
getUnknownFields().writeTo(output);
}
@ -501,6 +536,10 @@ public final class CellProtos {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(6, value_);
}
if (((bitField0_ & 0x00000040) == 0x00000040)) {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(7, tags_);
}
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@ -554,6 +593,11 @@ public final class CellProtos {
result = result && getValue()
.equals(other.getValue());
}
result = result && (hasTags() == other.hasTags());
if (hasTags()) {
result = result && getTags()
.equals(other.getTags());
}
result = result &&
getUnknownFields().equals(other.getUnknownFields());
return result;
@ -591,6 +635,10 @@ public final class CellProtos {
hash = (37 * hash) + VALUE_FIELD_NUMBER;
hash = (53 * hash) + getValue().hashCode();
}
if (hasTags()) {
hash = (37 * hash) + TAGS_FIELD_NUMBER;
hash = (53 * hash) + getTags().hashCode();
}
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
@ -717,6 +765,8 @@ public final class CellProtos {
bitField0_ = (bitField0_ & ~0x00000010);
value_ = com.google.protobuf.ByteString.EMPTY;
bitField0_ = (bitField0_ & ~0x00000020);
tags_ = com.google.protobuf.ByteString.EMPTY;
bitField0_ = (bitField0_ & ~0x00000040);
return this;
}
@ -769,6 +819,10 @@ public final class CellProtos {
to_bitField0_ |= 0x00000020;
}
result.value_ = value_;
if (((from_bitField0_ & 0x00000040) == 0x00000040)) {
to_bitField0_ |= 0x00000040;
}
result.tags_ = tags_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@ -803,6 +857,9 @@ public final class CellProtos {
if (other.hasValue()) {
setValue(other.getValue());
}
if (other.hasTags()) {
setTags(other.getTags());
}
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@ -1043,6 +1100,42 @@ public final class CellProtos {
return this;
}
// optional bytes tags = 7;
private com.google.protobuf.ByteString tags_ = com.google.protobuf.ByteString.EMPTY;
/**
* <code>optional bytes tags = 7;</code>
*/
public boolean hasTags() {
return ((bitField0_ & 0x00000040) == 0x00000040);
}
/**
* <code>optional bytes tags = 7;</code>
*/
public com.google.protobuf.ByteString getTags() {
return tags_;
}
/**
* <code>optional bytes tags = 7;</code>
*/
public Builder setTags(com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
bitField0_ |= 0x00000040;
tags_ = value;
onChanged();
return this;
}
/**
* <code>optional bytes tags = 7;</code>
*/
public Builder clearTags() {
bitField0_ = (bitField0_ & ~0x00000040);
tags_ = getDefaultInstance().getTags();
onChanged();
return this;
}
// @@protoc_insertion_point(builder_scope:Cell)
}
@ -1116,6 +1209,16 @@ public final class CellProtos {
* <code>optional bytes value = 6;</code>
*/
com.google.protobuf.ByteString getValue();
// optional bytes tags = 7;
/**
* <code>optional bytes tags = 7;</code>
*/
boolean hasTags();
/**
* <code>optional bytes tags = 7;</code>
*/
com.google.protobuf.ByteString getTags();
}
/**
* Protobuf type {@code KeyValue}
@ -1210,6 +1313,11 @@ public final class CellProtos {
value_ = input.readBytes();
break;
}
case 58: {
bitField0_ |= 0x00000040;
tags_ = input.readBytes();
break;
}
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@ -1346,6 +1454,22 @@ public final class CellProtos {
return value_;
}
// optional bytes tags = 7;
public static final int TAGS_FIELD_NUMBER = 7;
private com.google.protobuf.ByteString tags_;
/**
* <code>optional bytes tags = 7;</code>
*/
public boolean hasTags() {
return ((bitField0_ & 0x00000040) == 0x00000040);
}
/**
* <code>optional bytes tags = 7;</code>
*/
public com.google.protobuf.ByteString getTags() {
return tags_;
}
private void initFields() {
row_ = com.google.protobuf.ByteString.EMPTY;
family_ = com.google.protobuf.ByteString.EMPTY;
@ -1353,6 +1477,7 @@ public final class CellProtos {
timestamp_ = 0L;
keyType_ = org.apache.hadoop.hbase.protobuf.generated.CellProtos.CellType.MINIMUM;
value_ = com.google.protobuf.ByteString.EMPTY;
tags_ = com.google.protobuf.ByteString.EMPTY;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@ -1396,6 +1521,9 @@ public final class CellProtos {
if (((bitField0_ & 0x00000020) == 0x00000020)) {
output.writeBytes(6, value_);
}
if (((bitField0_ & 0x00000040) == 0x00000040)) {
output.writeBytes(7, tags_);
}
getUnknownFields().writeTo(output);
}
@ -1429,6 +1557,10 @@ public final class CellProtos {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(6, value_);
}
if (((bitField0_ & 0x00000040) == 0x00000040)) {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(7, tags_);
}
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@ -1482,6 +1614,11 @@ public final class CellProtos {
result = result && getValue()
.equals(other.getValue());
}
result = result && (hasTags() == other.hasTags());
if (hasTags()) {
result = result && getTags()
.equals(other.getTags());
}
result = result &&
getUnknownFields().equals(other.getUnknownFields());
return result;
@ -1519,6 +1656,10 @@ public final class CellProtos {
hash = (37 * hash) + VALUE_FIELD_NUMBER;
hash = (53 * hash) + getValue().hashCode();
}
if (hasTags()) {
hash = (37 * hash) + TAGS_FIELD_NUMBER;
hash = (53 * hash) + getTags().hashCode();
}
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
@ -1646,6 +1787,8 @@ public final class CellProtos {
bitField0_ = (bitField0_ & ~0x00000010);
value_ = com.google.protobuf.ByteString.EMPTY;
bitField0_ = (bitField0_ & ~0x00000020);
tags_ = com.google.protobuf.ByteString.EMPTY;
bitField0_ = (bitField0_ & ~0x00000040);
return this;
}
@ -1698,6 +1841,10 @@ public final class CellProtos {
to_bitField0_ |= 0x00000020;
}
result.value_ = value_;
if (((from_bitField0_ & 0x00000040) == 0x00000040)) {
to_bitField0_ |= 0x00000040;
}
result.tags_ = tags_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@ -1732,6 +1879,9 @@ public final class CellProtos {
if (other.hasValue()) {
setValue(other.getValue());
}
if (other.hasTags()) {
setTags(other.getTags());
}
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@ -1984,6 +2134,42 @@ public final class CellProtos {
return this;
}
// optional bytes tags = 7;
private com.google.protobuf.ByteString tags_ = com.google.protobuf.ByteString.EMPTY;
/**
* <code>optional bytes tags = 7;</code>
*/
public boolean hasTags() {
return ((bitField0_ & 0x00000040) == 0x00000040);
}
/**
* <code>optional bytes tags = 7;</code>
*/
public com.google.protobuf.ByteString getTags() {
return tags_;
}
/**
* <code>optional bytes tags = 7;</code>
*/
public Builder setTags(com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
bitField0_ |= 0x00000040;
tags_ = value;
onChanged();
return this;
}
/**
* <code>optional bytes tags = 7;</code>
*/
public Builder clearTags() {
bitField0_ = (bitField0_ & ~0x00000040);
tags_ = getDefaultInstance().getTags();
onChanged();
return this;
}
// @@protoc_insertion_point(builder_scope:KeyValue)
}
@ -2014,17 +2200,18 @@ public final class CellProtos {
descriptor;
static {
java.lang.String[] descriptorData = {
"\n\nCell.proto\"v\n\004Cell\022\013\n\003row\030\001 \001(\014\022\016\n\006fam" +
"ily\030\002 \001(\014\022\021\n\tqualifier\030\003 \001(\014\022\021\n\ttimestam" +
"p\030\004 \001(\004\022\034\n\tcell_type\030\005 \001(\0162\t.CellType\022\r\n" +
"\005value\030\006 \001(\014\"y\n\010KeyValue\022\013\n\003row\030\001 \002(\014\022\016\n" +
"\006family\030\002 \002(\014\022\021\n\tqualifier\030\003 \002(\014\022\021\n\ttime" +
"stamp\030\004 \001(\004\022\033\n\010key_type\030\005 \001(\0162\t.CellType" +
"\022\r\n\005value\030\006 \001(\014*`\n\010CellType\022\013\n\007MINIMUM\020\000" +
"\022\007\n\003PUT\020\004\022\n\n\006DELETE\020\010\022\021\n\rDELETE_COLUMN\020\014" +
"\022\021\n\rDELETE_FAMILY\020\016\022\014\n\007MAXIMUM\020\377\001B=\n*org" +
".apache.hadoop.hbase.protobuf.generatedB",
"\nCellProtosH\001\240\001\001"
"\n\nCell.proto\"\204\001\n\004Cell\022\013\n\003row\030\001 \001(\014\022\016\n\006fa" +
"mily\030\002 \001(\014\022\021\n\tqualifier\030\003 \001(\014\022\021\n\ttimesta" +
"mp\030\004 \001(\004\022\034\n\tcell_type\030\005 \001(\0162\t.CellType\022\r" +
"\n\005value\030\006 \001(\014\022\014\n\004tags\030\007 \001(\014\"\207\001\n\010KeyValue" +
"\022\013\n\003row\030\001 \002(\014\022\016\n\006family\030\002 \002(\014\022\021\n\tqualifi" +
"er\030\003 \002(\014\022\021\n\ttimestamp\030\004 \001(\004\022\033\n\010key_type\030" +
"\005 \001(\0162\t.CellType\022\r\n\005value\030\006 \001(\014\022\014\n\004tags\030" +
"\007 \001(\014*`\n\010CellType\022\013\n\007MINIMUM\020\000\022\007\n\003PUT\020\004\022" +
"\n\n\006DELETE\020\010\022\021\n\rDELETE_COLUMN\020\014\022\021\n\rDELETE" +
"_FAMILY\020\016\022\014\n\007MAXIMUM\020\377\001B=\n*org.apache.ha",
"doop.hbase.protobuf.generatedB\nCellProto" +
"sH\001\240\001\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@ -2036,13 +2223,13 @@ public final class CellProtos {
internal_static_Cell_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_Cell_descriptor,
new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "CellType", "Value", });
new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "CellType", "Value", "Tags", });
internal_static_KeyValue_descriptor =
getDescriptor().getMessageTypes().get(1);
internal_static_KeyValue_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_KeyValue_descriptor,
new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "KeyType", "Value", });
new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "KeyType", "Value", "Tags", });
return null;
}
};

View File

@ -9288,6 +9288,16 @@ public final class ClientProtos {
* <code>optional .MutationProto.DeleteType delete_type = 4;</code>
*/
org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType getDeleteType();
// optional bytes tags = 5;
/**
* <code>optional bytes tags = 5;</code>
*/
boolean hasTags();
/**
* <code>optional bytes tags = 5;</code>
*/
com.google.protobuf.ByteString getTags();
}
/**
* Protobuf type {@code MutationProto.ColumnValue.QualifierValue}
@ -9366,6 +9376,11 @@ public final class ClientProtos {
}
break;
}
case 42: {
bitField0_ |= 0x00000010;
tags_ = input.readBytes();
break;
}
}
}
} catch (com.google.protobuf.InvalidProtocolBufferException e) {
@ -9470,11 +9485,28 @@ public final class ClientProtos {
return deleteType_;
}
// optional bytes tags = 5;
public static final int TAGS_FIELD_NUMBER = 5;
private com.google.protobuf.ByteString tags_;
/**
* <code>optional bytes tags = 5;</code>
*/
public boolean hasTags() {
return ((bitField0_ & 0x00000010) == 0x00000010);
}
/**
* <code>optional bytes tags = 5;</code>
*/
public com.google.protobuf.ByteString getTags() {
return tags_;
}
private void initFields() {
qualifier_ = com.google.protobuf.ByteString.EMPTY;
value_ = com.google.protobuf.ByteString.EMPTY;
timestamp_ = 0L;
deleteType_ = org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType.DELETE_ONE_VERSION;
tags_ = com.google.protobuf.ByteString.EMPTY;
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@ -9500,6 +9532,9 @@ public final class ClientProtos {
if (((bitField0_ & 0x00000008) == 0x00000008)) {
output.writeEnum(4, deleteType_.getNumber());
}
if (((bitField0_ & 0x00000010) == 0x00000010)) {
output.writeBytes(5, tags_);
}
getUnknownFields().writeTo(output);
}
@ -9525,6 +9560,10 @@ public final class ClientProtos {
size += com.google.protobuf.CodedOutputStream
.computeEnumSize(4, deleteType_.getNumber());
}
if (((bitField0_ & 0x00000010) == 0x00000010)) {
size += com.google.protobuf.CodedOutputStream
.computeBytesSize(5, tags_);
}
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@ -9568,6 +9607,11 @@ public final class ClientProtos {
result = result &&
(getDeleteType() == other.getDeleteType());
}
result = result && (hasTags() == other.hasTags());
if (hasTags()) {
result = result && getTags()
.equals(other.getTags());
}
result = result &&
getUnknownFields().equals(other.getUnknownFields());
return result;
@ -9597,6 +9641,10 @@ public final class ClientProtos {
hash = (37 * hash) + DELETE_TYPE_FIELD_NUMBER;
hash = (53 * hash) + hashEnum(getDeleteType());
}
if (hasTags()) {
hash = (37 * hash) + TAGS_FIELD_NUMBER;
hash = (53 * hash) + getTags().hashCode();
}
hash = (29 * hash) + getUnknownFields().hashCode();
memoizedHashCode = hash;
return hash;
@ -9714,6 +9762,8 @@ public final class ClientProtos {
bitField0_ = (bitField0_ & ~0x00000004);
deleteType_ = org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType.DELETE_ONE_VERSION;
bitField0_ = (bitField0_ & ~0x00000008);
tags_ = com.google.protobuf.ByteString.EMPTY;
bitField0_ = (bitField0_ & ~0x00000010);
return this;
}
@ -9758,6 +9808,10 @@ public final class ClientProtos {
to_bitField0_ |= 0x00000008;
}
result.deleteType_ = deleteType_;
if (((from_bitField0_ & 0x00000010) == 0x00000010)) {
to_bitField0_ |= 0x00000010;
}
result.tags_ = tags_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@ -9786,6 +9840,9 @@ public final class ClientProtos {
if (other.hasDeleteType()) {
setDeleteType(other.getDeleteType());
}
if (other.hasTags()) {
setTags(other.getTags());
}
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@ -9954,6 +10011,42 @@ public final class ClientProtos {
return this;
}
// optional bytes tags = 5;
private com.google.protobuf.ByteString tags_ = com.google.protobuf.ByteString.EMPTY;
/**
* <code>optional bytes tags = 5;</code>
*/
public boolean hasTags() {
return ((bitField0_ & 0x00000010) == 0x00000010);
}
/**
* <code>optional bytes tags = 5;</code>
*/
public com.google.protobuf.ByteString getTags() {
return tags_;
}
/**
* <code>optional bytes tags = 5;</code>
*/
public Builder setTags(com.google.protobuf.ByteString value) {
if (value == null) {
throw new NullPointerException();
}
bitField0_ |= 0x00000010;
tags_ = value;
onChanged();
return this;
}
/**
* <code>optional bytes tags = 5;</code>
*/
public Builder clearTags() {
bitField0_ = (bitField0_ & ~0x00000010);
tags_ = getDefaultInstance().getTags();
onChanged();
return this;
}
// @@protoc_insertion_point(builder_scope:MutationProto.ColumnValue.QualifierValue)
}
@ -27723,7 +27816,7 @@ public final class ClientProtos {
"exists\030\002 \003(\010\"\200\001\n\tCondition\022\013\n\003row\030\001 \002(\014\022",
"\016\n\006family\030\002 \002(\014\022\021\n\tqualifier\030\003 \002(\014\022\"\n\014co" +
"mpare_type\030\004 \002(\0162\014.CompareType\022\037\n\ncompar" +
"ator\030\005 \002(\0132\013.Comparator\"\227\006\n\rMutationProt" +
"ator\030\005 \002(\0132\013.Comparator\"\246\006\n\rMutationProt" +
"o\022\013\n\003row\030\001 \001(\014\0220\n\013mutate_type\030\002 \001(\0162\033.Mu" +
"tationProto.MutationType\0220\n\014column_value" +
"\030\003 \003(\0132\032.MutationProto.ColumnValue\022\021\n\tti" +
@ -27731,70 +27824,71 @@ public final class ClientProtos {
"ytesPair\022:\n\ndurability\030\006 \001(\0162\031.MutationP" +
"roto.Durability:\013USE_DEFAULT\022\036\n\ntime_ran" +
"ge\030\007 \001(\0132\n.TimeRange\022\035\n\025associated_cell_",
"count\030\010 \001(\005\032\330\001\n\013ColumnValue\022\016\n\006family\030\001 " +
"count\030\010 \001(\005\032\347\001\n\013ColumnValue\022\016\n\006family\030\001 " +
"\002(\014\022B\n\017qualifier_value\030\002 \003(\0132).MutationP" +
"roto.ColumnValue.QualifierValue\032u\n\016Quali" +
"fierValue\022\021\n\tqualifier\030\001 \001(\014\022\r\n\005value\030\002 " +
"\001(\014\022\021\n\ttimestamp\030\003 \001(\004\022.\n\013delete_type\030\004 " +
"\001(\0162\031.MutationProto.DeleteType\"W\n\nDurabi" +
"lity\022\017\n\013USE_DEFAULT\020\000\022\014\n\010SKIP_WAL\020\001\022\r\n\tA" +
"SYNC_WAL\020\002\022\014\n\010SYNC_WAL\020\003\022\r\n\tFSYNC_WAL\020\004\"" +
">\n\014MutationType\022\n\n\006APPEND\020\000\022\r\n\tINCREMENT" +
"\020\001\022\007\n\003PUT\020\002\022\n\n\006DELETE\020\003\"p\n\nDeleteType\022\026\n",
"\022DELETE_ONE_VERSION\020\000\022\034\n\030DELETE_MULTIPLE" +
"_VERSIONS\020\001\022\021\n\rDELETE_FAMILY\020\002\022\031\n\025DELETE" +
"_FAMILY_VERSION\020\003\"r\n\rMutateRequest\022 \n\006re" +
"gion\030\001 \002(\0132\020.RegionSpecifier\022 \n\010mutation" +
"\030\002 \002(\0132\016.MutationProto\022\035\n\tcondition\030\003 \001(" +
"\0132\n.Condition\"<\n\016MutateResponse\022\027\n\006resul" +
"t\030\001 \001(\0132\007.Result\022\021\n\tprocessed\030\002 \001(\010\"\344\002\n\004" +
"Scan\022\027\n\006column\030\001 \003(\0132\007.Column\022!\n\tattribu" +
"te\030\002 \003(\0132\016.NameBytesPair\022\021\n\tstart_row\030\003 " +
"\001(\014\022\020\n\010stop_row\030\004 \001(\014\022\027\n\006filter\030\005 \001(\0132\007.",
"Filter\022\036\n\ntime_range\030\006 \001(\0132\n.TimeRange\022\027" +
"\n\014max_versions\030\007 \001(\r:\0011\022\032\n\014cache_blocks\030" +
"\010 \001(\010:\004true\022\022\n\nbatch_size\030\t \001(\r\022\027\n\017max_r" +
"esult_size\030\n \001(\004\022\023\n\013store_limit\030\013 \001(\r\022\024\n" +
"\014store_offset\030\014 \001(\r\022&\n\036load_column_famil" +
"ies_on_demand\030\r \001(\010\022\r\n\005small\030\016 \001(\010\"\236\001\n\013S" +
"canRequest\022 \n\006region\030\001 \001(\0132\020.RegionSpeci" +
"fier\022\023\n\004scan\030\002 \001(\0132\005.Scan\022\022\n\nscanner_id\030" +
"\003 \001(\004\022\026\n\016number_of_rows\030\004 \001(\r\022\025\n\rclose_s" +
"canner\030\005 \001(\010\022\025\n\rnext_call_seq\030\006 \001(\004\"y\n\014S",
"canResponse\022\030\n\020cells_per_result\030\001 \003(\r\022\022\n" +
"\nscanner_id\030\002 \001(\004\022\024\n\014more_results\030\003 \001(\010\022" +
"\013\n\003ttl\030\004 \001(\r\022\030\n\007results\030\005 \003(\0132\007.Result\"\263" +
"\001\n\024BulkLoadHFileRequest\022 \n\006region\030\001 \002(\0132" +
"\020.RegionSpecifier\0225\n\013family_path\030\002 \003(\0132 " +
".BulkLoadHFileRequest.FamilyPath\022\026\n\016assi" +
"gn_seq_num\030\003 \001(\010\032*\n\nFamilyPath\022\016\n\006family" +
"\030\001 \002(\014\022\014\n\004path\030\002 \002(\t\"\'\n\025BulkLoadHFileRes" +
"ponse\022\016\n\006loaded\030\001 \002(\010\"a\n\026CoprocessorServ" +
"iceCall\022\013\n\003row\030\001 \002(\014\022\024\n\014service_name\030\002 \002",
"(\t\022\023\n\013method_name\030\003 \002(\t\022\017\n\007request\030\004 \002(\014" +
"\"d\n\031CoprocessorServiceRequest\022 \n\006region\030" +
"\001 \002(\0132\020.RegionSpecifier\022%\n\004call\030\002 \002(\0132\027." +
"CoprocessorServiceCall\"]\n\032CoprocessorSer" +
"viceResponse\022 \n\006region\030\001 \002(\0132\020.RegionSpe" +
"cifier\022\035\n\005value\030\002 \002(\0132\016.NameBytesPair\"B\n" +
"\013MultiAction\022 \n\010mutation\030\001 \001(\0132\016.Mutatio" +
"nProto\022\021\n\003get\030\002 \001(\0132\004.Get\"I\n\014ActionResul" +
"t\022\026\n\005value\030\001 \001(\0132\007.Result\022!\n\texception\030\002" +
" \001(\0132\016.NameBytesPair\"^\n\014MultiRequest\022 \n\006",
"region\030\001 \002(\0132\020.RegionSpecifier\022\034\n\006action" +
"\030\002 \003(\0132\014.MultiAction\022\016\n\006atomic\030\003 \001(\010\".\n\r" +
"MultiResponse\022\035\n\006result\030\001 \003(\0132\r.ActionRe" +
"sult2\342\002\n\rClientService\022 \n\003Get\022\013.GetReque" +
"st\032\014.GetResponse\022/\n\010MultiGet\022\020.MultiGetR" +
"equest\032\021.MultiGetResponse\022)\n\006Mutate\022\016.Mu" +
"tateRequest\032\017.MutateResponse\022#\n\004Scan\022\014.S" +
"canRequest\032\r.ScanResponse\022>\n\rBulkLoadHFi" +
"le\022\025.BulkLoadHFileRequest\032\026.BulkLoadHFil" +
"eResponse\022F\n\013ExecService\022\032.CoprocessorSe",
"rviceRequest\032\033.CoprocessorServiceRespons" +
"e\022&\n\005Multi\022\r.MultiRequest\032\016.MultiRespons" +
"eBB\n*org.apache.hadoop.hbase.protobuf.ge" +
"neratedB\014ClientProtosH\001\210\001\001\240\001\001"
"roto.ColumnValue.QualifierValue\032\203\001\n\016Qual" +
"ifierValue\022\021\n\tqualifier\030\001 \001(\014\022\r\n\005value\030\002" +
" \001(\014\022\021\n\ttimestamp\030\003 \001(\004\022.\n\013delete_type\030\004" +
" \001(\0162\031.MutationProto.DeleteType\022\014\n\004tags\030" +
"\005 \001(\014\"W\n\nDurability\022\017\n\013USE_DEFAULT\020\000\022\014\n\010" +
"SKIP_WAL\020\001\022\r\n\tASYNC_WAL\020\002\022\014\n\010SYNC_WAL\020\003\022" +
"\r\n\tFSYNC_WAL\020\004\">\n\014MutationType\022\n\n\006APPEND" +
"\020\000\022\r\n\tINCREMENT\020\001\022\007\n\003PUT\020\002\022\n\n\006DELETE\020\003\"p",
"\n\nDeleteType\022\026\n\022DELETE_ONE_VERSION\020\000\022\034\n\030" +
"DELETE_MULTIPLE_VERSIONS\020\001\022\021\n\rDELETE_FAM" +
"ILY\020\002\022\031\n\025DELETE_FAMILY_VERSION\020\003\"r\n\rMuta" +
"teRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpecif" +
"ier\022 \n\010mutation\030\002 \002(\0132\016.MutationProto\022\035\n" +
"\tcondition\030\003 \001(\0132\n.Condition\"<\n\016MutateRe" +
"sponse\022\027\n\006result\030\001 \001(\0132\007.Result\022\021\n\tproce" +
"ssed\030\002 \001(\010\"\344\002\n\004Scan\022\027\n\006column\030\001 \003(\0132\007.Co" +
"lumn\022!\n\tattribute\030\002 \003(\0132\016.NameBytesPair\022" +
"\021\n\tstart_row\030\003 \001(\014\022\020\n\010stop_row\030\004 \001(\014\022\027\n\006",
"filter\030\005 \001(\0132\007.Filter\022\036\n\ntime_range\030\006 \001(" +
"\0132\n.TimeRange\022\027\n\014max_versions\030\007 \001(\r:\0011\022\032" +
"\n\014cache_blocks\030\010 \001(\010:\004true\022\022\n\nbatch_size" +
"\030\t \001(\r\022\027\n\017max_result_size\030\n \001(\004\022\023\n\013store" +
"_limit\030\013 \001(\r\022\024\n\014store_offset\030\014 \001(\r\022&\n\036lo" +
"ad_column_families_on_demand\030\r \001(\010\022\r\n\005sm" +
"all\030\016 \001(\010\"\236\001\n\013ScanRequest\022 \n\006region\030\001 \001(" +
"\0132\020.RegionSpecifier\022\023\n\004scan\030\002 \001(\0132\005.Scan" +
"\022\022\n\nscanner_id\030\003 \001(\004\022\026\n\016number_of_rows\030\004" +
" \001(\r\022\025\n\rclose_scanner\030\005 \001(\010\022\025\n\rnext_call",
"_seq\030\006 \001(\004\"y\n\014ScanResponse\022\030\n\020cells_per_" +
"result\030\001 \003(\r\022\022\n\nscanner_id\030\002 \001(\004\022\024\n\014more" +
"_results\030\003 \001(\010\022\013\n\003ttl\030\004 \001(\r\022\030\n\007results\030\005" +
" \003(\0132\007.Result\"\263\001\n\024BulkLoadHFileRequest\022 " +
"\n\006region\030\001 \002(\0132\020.RegionSpecifier\0225\n\013fami" +
"ly_path\030\002 \003(\0132 .BulkLoadHFileRequest.Fam" +
"ilyPath\022\026\n\016assign_seq_num\030\003 \001(\010\032*\n\nFamil" +
"yPath\022\016\n\006family\030\001 \002(\014\022\014\n\004path\030\002 \002(\t\"\'\n\025B" +
"ulkLoadHFileResponse\022\016\n\006loaded\030\001 \002(\010\"a\n\026" +
"CoprocessorServiceCall\022\013\n\003row\030\001 \002(\014\022\024\n\014s",
"ervice_name\030\002 \002(\t\022\023\n\013method_name\030\003 \002(\t\022\017" +
"\n\007request\030\004 \002(\014\"d\n\031CoprocessorServiceReq" +
"uest\022 \n\006region\030\001 \002(\0132\020.RegionSpecifier\022%" +
"\n\004call\030\002 \002(\0132\027.CoprocessorServiceCall\"]\n" +
"\032CoprocessorServiceResponse\022 \n\006region\030\001 " +
"\002(\0132\020.RegionSpecifier\022\035\n\005value\030\002 \002(\0132\016.N" +
"ameBytesPair\"B\n\013MultiAction\022 \n\010mutation\030" +
"\001 \001(\0132\016.MutationProto\022\021\n\003get\030\002 \001(\0132\004.Get" +
"\"I\n\014ActionResult\022\026\n\005value\030\001 \001(\0132\007.Result" +
"\022!\n\texception\030\002 \001(\0132\016.NameBytesPair\"^\n\014M",
"ultiRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpec" +
"ifier\022\034\n\006action\030\002 \003(\0132\014.MultiAction\022\016\n\006a" +
"tomic\030\003 \001(\010\".\n\rMultiResponse\022\035\n\006result\030\001" +
" \003(\0132\r.ActionResult2\342\002\n\rClientService\022 \n" +
"\003Get\022\013.GetRequest\032\014.GetResponse\022/\n\010Multi" +
"Get\022\020.MultiGetRequest\032\021.MultiGetResponse" +
"\022)\n\006Mutate\022\016.MutateRequest\032\017.MutateRespo" +
"nse\022#\n\004Scan\022\014.ScanRequest\032\r.ScanResponse" +
"\022>\n\rBulkLoadHFile\022\025.BulkLoadHFileRequest" +
"\032\026.BulkLoadHFileResponse\022F\n\013ExecService\022",
"\032.CoprocessorServiceRequest\032\033.Coprocesso" +
"rServiceResponse\022&\n\005Multi\022\r.MultiRequest" +
"\032\016.MultiResponseBB\n*org.apache.hadoop.hb" +
"ase.protobuf.generatedB\014ClientProtosH\001\210\001" +
"\001\240\001\001"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@ -27866,7 +27960,7 @@ public final class ClientProtos {
internal_static_MutationProto_ColumnValue_QualifierValue_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_MutationProto_ColumnValue_QualifierValue_descriptor,
new java.lang.String[] { "Qualifier", "Value", "Timestamp", "DeleteType", });
new java.lang.String[] { "Qualifier", "Value", "Timestamp", "DeleteType", "Tags", });
internal_static_MutateRequest_descriptor =
getDescriptor().getMessageTypes().get(9);
internal_static_MutateRequest_fieldAccessorTable = new

View File

@ -48,6 +48,7 @@ message Cell {
optional uint64 timestamp = 4;
optional CellType cell_type = 5;
optional bytes value = 6;
optional bytes tags = 7;
}
/**
@ -61,4 +62,5 @@ message KeyValue {
optional uint64 timestamp = 4;
optional CellType key_type = 5;
optional bytes value = 6;
optional bytes tags = 7;
}

View File

@ -187,6 +187,7 @@ message MutationProto {
optional bytes value = 2;
optional uint64 timestamp = 3;
optional DeleteType delete_type = 4;
optional bytes tags = 5;
}
}
}

View File

@ -329,4 +329,6 @@ public abstract class AbstractHFileReader implements HFile.Reader {
public DataBlockEncoding getEncodingOnDisk() {
return dataBlockEncoder.getEncodingOnDisk();
}
public abstract int getMajorVersion();
}

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
@ -61,9 +62,6 @@ public abstract class AbstractHFileWriter implements HFile.Writer {
/** A "file info" block: a key-value map of file-wide metadata. */
protected FileInfo fileInfo = new HFile.FileInfo();
/** Number of uncompressed bytes we allow per block. */
protected final int blockSize;
/** Total # of key/value entries, i.e. how many times add() was called. */
protected long entryCount = 0;
@ -85,15 +83,6 @@ public abstract class AbstractHFileWriter implements HFile.Writer {
/** {@link Writable}s representing meta block data. */
protected List<Writable> metaData = new ArrayList<Writable>();
/** The compression algorithm used. NONE if no compression. */
protected final Compression.Algorithm compressAlgo;
/**
* The data block encoding which will be used.
* {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding.
*/
protected final HFileDataBlockEncoder blockEncoder;
/** First key in a block. */
protected byte[] firstKeyInBlock = null;
@ -110,19 +99,28 @@ public abstract class AbstractHFileWriter implements HFile.Writer {
*/
protected final String name;
/**
* The data block encoding which will be used.
* {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding.
*/
protected final HFileDataBlockEncoder blockEncoder;
protected final HFileContext hFileContext;
public AbstractHFileWriter(CacheConfig cacheConf,
FSDataOutputStream outputStream, Path path, int blockSize,
Compression.Algorithm compressAlgo,
HFileDataBlockEncoder dataBlockEncoder,
KVComparator comparator) {
FSDataOutputStream outputStream, Path path,
KVComparator comparator, HFileContext fileContext) {
this.outputStream = outputStream;
this.path = path;
this.name = path != null ? path.getName() : outputStream.toString();
this.blockSize = blockSize;
this.compressAlgo = compressAlgo == null
? HFile.DEFAULT_COMPRESSION_ALGORITHM : compressAlgo;
this.blockEncoder = dataBlockEncoder != null
? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
this.hFileContext = fileContext;
if (hFileContext.getEncodingOnDisk() != DataBlockEncoding.NONE
|| hFileContext.getEncodingInCache() != DataBlockEncoding.NONE) {
this.blockEncoder = new HFileDataBlockEncoderImpl(hFileContext.getEncodingOnDisk(),
hFileContext.getEncodingInCache());
} else {
this.blockEncoder = NoOpDataBlockEncoder.INSTANCE;
}
this.comparator = comparator != null ? comparator
: KeyValue.COMPARATOR;
@ -234,7 +232,7 @@ public abstract class AbstractHFileWriter implements HFile.Writer {
@Override
public String toString() {
return "writer=" + (path != null ? path.toString() : null) + ", name="
+ name + ", compression=" + compressAlgo.getName();
+ name + ", compression=" + hFileContext.getCompression().getName();
}
/**
@ -245,7 +243,7 @@ public abstract class AbstractHFileWriter implements HFile.Writer {
trailer.setMetaIndexCount(metaNames.size());
trailer.setTotalUncompressedBytes(totalUncompressedBytes+ trailer.getTrailerSize());
trailer.setEntryCount(entryCount);
trailer.setCompressionCodec(compressAlgo);
trailer.setCompressionCodec(hFileContext.getCompression());
trailer.serialize(outputStream);

View File

@ -23,10 +23,7 @@ import java.nio.ByteBuffer;
import java.util.zip.Checksum;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumFactory;
import org.apache.hadoop.hbase.util.ChecksumType;
/**
@ -107,7 +104,7 @@ public class ChecksumUtil {
// when the minorVersion is 0, thus this is a defensive check for a
// cannot-happen case. Since this is a cannot-happen case, it is
// better to return false to indicate a checksum validation failure.
if (block.getMinorVersion() < HFileBlock.MINOR_VERSION_WITH_CHECKSUM) {
if (!block.getHFileContext().shouldUseHBaseChecksum()) {
return false;
}

View File

@ -54,11 +54,6 @@ import com.google.common.io.NullOutputStream;
@InterfaceAudience.Private
public class FixedFileTrailer {
private static final Log LOG = LogFactory.getLog(FixedFileTrailer.class);
/** HFile minor version that introduced pbuf filetrailer */
private static final int PBUF_TRAILER_MINOR_VERSION = 2;
/**
* We store the comparator class name as a fixed-length field in the trailer.
*/
@ -131,18 +126,13 @@ public class FixedFileTrailer {
private static int[] computeTrailerSizeByVersion() {
int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1];
for (int version = HFile.MIN_FORMAT_VERSION;
version <= HFile.MAX_FORMAT_VERSION;
++version) {
FixedFileTrailer fft = new FixedFileTrailer(version, HFileBlock.MINOR_VERSION_NO_CHECKSUM);
DataOutputStream dos = new DataOutputStream(new NullOutputStream());
try {
fft.serialize(dos);
} catch (IOException ex) {
// The above has no reason to fail.
throw new RuntimeException(ex);
}
versionToSize[version] = dos.size();
// We support only 2 major versions now. ie. V2, V3
versionToSize[2] = 212;
for (int version = 3; version <= HFile.MAX_FORMAT_VERSION; version++) {
// Max FFT size for V3 and above is taken as 1KB for future enhancements
// if any.
// Unless the trailer size exceeds 1024 this can continue
versionToSize[version] = 1024;
}
return versionToSize;
}
@ -184,11 +174,7 @@ public class FixedFileTrailer {
DataOutputStream baosDos = new DataOutputStream(baos);
BlockType.TRAILER.write(baosDos);
if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) {
serializeAsPB(baosDos);
} else {
serializeAsWritable(baosDos);
}
serializeAsPB(baosDos);
// The last 4 bytes of the file encode the major and minor version universally
baosDos.writeInt(materializeVersion(majorVersion, minorVersion));
@ -233,29 +219,6 @@ public class FixedFileTrailer {
}
}
/**
* Write trailer data as writable
* @param outputStream
* @throws IOException
*/
void serializeAsWritable(DataOutputStream output) throws IOException {
output.writeLong(fileInfoOffset);
output.writeLong(loadOnOpenDataOffset);
output.writeInt(dataIndexCount);
output.writeLong(uncompressedDataIndexSize);
output.writeInt(metaIndexCount);
output.writeLong(totalUncompressedBytes);
output.writeLong(entryCount);
output.writeInt(compressionCodec.ordinal());
output.writeInt(numDataIndexLevels);
output.writeLong(firstDataBlockOffset);
output.writeLong(lastDataBlockOffset);
Bytes.writeStringFixedSize(output, comparatorClassName, MAX_COMPARATOR_NAME_LENGTH);
}
/**
* Deserialize the fixed file trailer from the given stream. The version needs
* to already be specified. Make sure this is consistent with
@ -269,7 +232,8 @@ public class FixedFileTrailer {
BlockType.TRAILER.readAndCheck(inputStream);
if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) {
if (majorVersion > 2
|| (majorVersion == 2 && minorVersion >= HFileReaderV2.PBUF_TRAILER_MINOR_VERSION)) {
deserializeFromPB(inputStream);
} else {
deserializeFromWritable(inputStream);
@ -655,7 +619,7 @@ public class FixedFileTrailer {
* Create a 4 byte serialized version number by combining the
* minor and major version numbers.
*/
private static int materializeVersion(int majorVersion, int minorVersion) {
static int materializeVersion(int majorVersion, int minorVersion) {
return ((majorVersion & 0x00ffffff) | (minorVersion << 24));
}
}

View File

@ -50,23 +50,21 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair;
import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
import com.google.common.base.Preconditions;
@ -156,7 +154,7 @@ public class HFile {
/** Maximum supported HFile format version
*/
public static final int MAX_FORMAT_VERSION = 2;
public static final int MAX_FORMAT_VERSION = 3;
/** Default compression name: none. */
public final static String DEFAULT_COMPRESSION =
@ -292,6 +290,8 @@ public class HFile {
void append(byte[] key, byte[] value) throws IOException;
void append (byte[] key, byte[] value, byte[] tag) throws IOException;
/** @return the path to this {@link HFile} */
Path getPath();
@ -332,15 +332,9 @@ public class HFile {
protected FileSystem fs;
protected Path path;
protected FSDataOutputStream ostream;
protected int blockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE;
protected Compression.Algorithm compression =
HFile.DEFAULT_COMPRESSION_ALGORITHM;
protected HFileDataBlockEncoder encoder = NoOpDataBlockEncoder.INSTANCE;
protected KVComparator comparator = KeyValue.COMPARATOR;
protected InetSocketAddress[] favoredNodes;
protected ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
protected int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM;
protected boolean includeMVCCReadpoint = true;
private HFileContext fileContext;
WriterFactory(Configuration conf, CacheConfig cacheConf) {
this.conf = conf;
@ -361,29 +355,6 @@ public class HFile {
return this;
}
public WriterFactory withBlockSize(int blockSize) {
this.blockSize = blockSize;
return this;
}
public WriterFactory withCompression(Compression.Algorithm compression) {
Preconditions.checkNotNull(compression);
this.compression = compression;
return this;
}
public WriterFactory withCompression(String compressAlgo) {
Preconditions.checkNotNull(compression);
this.compression = AbstractHFileWriter.compressionByName(compressAlgo);
return this;
}
public WriterFactory withDataBlockEncoder(HFileDataBlockEncoder encoder) {
Preconditions.checkNotNull(encoder);
this.encoder = encoder;
return this;
}
public WriterFactory withComparator(KVComparator comparator) {
Preconditions.checkNotNull(comparator);
this.comparator = comparator;
@ -396,23 +367,8 @@ public class HFile {
return this;
}
public WriterFactory withChecksumType(ChecksumType checksumType) {
Preconditions.checkNotNull(checksumType);
this.checksumType = checksumType;
return this;
}
public WriterFactory withBytesPerChecksum(int bytesPerChecksum) {
this.bytesPerChecksum = bytesPerChecksum;
return this;
}
/**
* @param includeMVCCReadpoint whether to write the mvcc readpoint to the file for each KV
* @return this (for chained invocation)
*/
public WriterFactory includeMVCCReadpoint(boolean includeMVCCReadpoint) {
this.includeMVCCReadpoint = includeMVCCReadpoint;
public WriterFactory withFileContext(HFileContext fileContext) {
this.fileContext = fileContext;
return this;
}
@ -424,16 +380,12 @@ public class HFile {
if (path != null) {
ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes);
}
return createWriter(fs, path, ostream, blockSize,
compression, encoder, comparator, checksumType, bytesPerChecksum, includeMVCCReadpoint);
return createWriter(fs, path, ostream,
comparator, fileContext);
}
protected abstract Writer createWriter(FileSystem fs, Path path,
FSDataOutputStream ostream, int blockSize,
Compression.Algorithm compress,
HFileDataBlockEncoder dataBlockEncoder,
KVComparator comparator, ChecksumType checksumType,
int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException;
protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
KVComparator comparator, HFileContext fileContext) throws IOException;
}
/** The configuration key for HFile version to use for new files */
@ -466,6 +418,8 @@ public class HFile {
switch (version) {
case 2:
return new HFileWriterV2.WriterFactoryV2(conf, cacheConf);
case 3:
return new HFileWriterV3.WriterFactoryV3(conf, cacheConf);
default:
throw new IllegalArgumentException("Cannot create writer for HFile " +
"format version " + version);
@ -573,6 +527,9 @@ public class HFile {
case 2:
return new HFileReaderV2(
path, trailer, fsdis, size, cacheConf, preferredEncodingInCache, hfs);
case 3 :
return new HFileReaderV3(
path, trailer, fsdis, size, cacheConf, preferredEncodingInCache, hfs);
default:
throw new CorruptHFileException("Invalid HFile version " + trailer.getMajorVersion());
}
@ -589,7 +546,6 @@ public class HFile {
public static Reader createReaderWithEncoding(
FileSystem fs, Path path, CacheConfig cacheConf,
DataBlockEncoding preferredEncodingInCache) throws IOException {
final boolean closeIStream = true;
FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path);
return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(),
cacheConf, preferredEncodingInCache, stream.getHfs());
@ -648,15 +604,16 @@ public class HFile {
}
/**
* Metadata for this file. Conjured by the writer. Read in by the reader.
* Metadata for this file. Conjured by the writer. Read in by the reader.
*/
static class FileInfo implements SortedMap<byte [], byte []> {
public static class FileInfo implements SortedMap<byte[], byte[]> {
static final String RESERVED_PREFIX = "hfile.";
static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX);
static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY");
static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN");
static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN");
static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR");
public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN");
private final SortedMap<byte [], byte []> map = new TreeMap<byte [], byte []>(Bytes.BYTES_COMPARATOR);
public FileInfo() {

View File

@ -17,7 +17,6 @@
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
@ -35,6 +34,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@ -43,7 +43,6 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.ClassSize;
@ -86,12 +85,6 @@ import com.google.common.base.Preconditions;
@InterfaceAudience.Private
public class HFileBlock implements Cacheable {
/** Minor versions starting with this number have hbase checksums */
static final int MINOR_VERSION_WITH_CHECKSUM = 1;
/** minor version that does not support checksums */
static final int MINOR_VERSION_NO_CHECKSUM = 0;
/**
* On a checksum failure on a Reader, these many suceeding read
* requests switch back to using hdfs checksums before auto-reenabling
@ -115,8 +108,8 @@ public class HFileBlock implements Cacheable {
public static final int BYTE_BUFFER_HEAP_SIZE = (int) ClassSize.estimateBase(
ByteBuffer.wrap(new byte[0], 0, 0).getClass(), false);
// minorVersion+offset+nextBlockOnDiskSizeWithHeader
public static final int EXTRA_SERIALIZATION_SPACE = 2 * Bytes.SIZEOF_INT
// meta.usesHBaseChecksum+offset+nextBlockOnDiskSizeWithHeader
public static final int EXTRA_SERIALIZATION_SPACE = Bytes.SIZEOF_BYTE + Bytes.SIZEOF_INT
+ Bytes.SIZEOF_LONG;
/**
@ -137,8 +130,8 @@ public class HFileBlock implements Cacheable {
}
buf.position(buf.limit());
buf.limit(buf.limit() + HFileBlock.EXTRA_SERIALIZATION_SPACE);
int minorVersion=buf.getInt();
HFileBlock ourBuffer = new HFileBlock(newByteBuffer, minorVersion);
boolean usesChecksum = buf.get() == (byte)1;
HFileBlock ourBuffer = new HFileBlock(newByteBuffer, usesChecksum);
ourBuffer.offset = buf.getLong();
ourBuffer.nextBlockOnDiskSizeWithHeader = buf.getInt();
return ourBuffer;
@ -171,23 +164,13 @@ public class HFileBlock implements Cacheable {
/** The offset of the previous block on disk */
private final long prevBlockOffset;
/** The Type of checksum, better to store the byte than an object */
private final byte checksumType;
/** The number of bytes for which a checksum is computed */
private final int bytesPerChecksum;
/** Size on disk of header and data. Does not include checksum data */
private final int onDiskDataSizeWithHeader;
/** The minor version of the hfile. */
private final int minorVersion;
/** The in-memory representation of the hfile block */
private ByteBuffer buf;
/** Whether there is a memstore timestamp after every key/value */
private boolean includesMemstoreTS;
/** Meta data that holds meta information on the hfileblock**/
private HFileContext fileContext;
/**
* The offset of this block in the file. Populated by the reader for
@ -220,17 +203,16 @@ public class HFileBlock implements Cacheable {
* @param fillHeader true to fill in the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of
* the buffer based on the header fields provided
* @param offset the file offset the block was read from
* @param minorVersion the minor version of this block
* @param bytesPerChecksum the number of bytes per checksum chunk
* @param checksumType the checksum algorithm to use
* @param onDiskDataSizeWithHeader size of header and data on disk not
* including checksum data
* @param fileContext HFile meta data
*/
HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader,
int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer buf,
boolean fillHeader, long offset, boolean includesMemstoreTS,
int minorVersion, int bytesPerChecksum, byte checksumType,
int onDiskDataSizeWithHeader) {
boolean fillHeader, long offset,
int onDiskDataSizeWithHeader, HFileContext fileContext) {
this.blockType = blockType;
this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader;
this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader;
@ -239,11 +221,8 @@ public class HFileBlock implements Cacheable {
if (fillHeader)
overwriteHeader();
this.offset = offset;
this.includesMemstoreTS = includesMemstoreTS;
this.minorVersion = minorVersion;
this.bytesPerChecksum = bytesPerChecksum;
this.checksumType = checksumType;
this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader;
this.fileContext = fileContext;
}
/**
@ -254,20 +233,21 @@ public class HFileBlock implements Cacheable {
* because majorNumbers indicate the format of a HFile whereas minorNumbers
* indicate the format inside a HFileBlock.
*/
HFileBlock(ByteBuffer b, int minorVersion) throws IOException {
HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException {
b.rewind();
blockType = BlockType.read(b);
onDiskSizeWithoutHeader = b.getInt();
uncompressedSizeWithoutHeader = b.getInt();
prevBlockOffset = b.getLong();
this.minorVersion = minorVersion;
if (minorVersion >= MINOR_VERSION_WITH_CHECKSUM) {
this.checksumType = b.get();
this.bytesPerChecksum = b.getInt();
this.fileContext = new HFileContext();
this.fileContext.setUsesHBaseChecksum(usesHBaseChecksum);
if (usesHBaseChecksum) {
this.fileContext.setChecksumType(ChecksumType.codeToType(b.get()));
this.fileContext.setBytesPerChecksum(b.getInt());
this.onDiskDataSizeWithHeader = b.getInt();
} else {
this.checksumType = ChecksumType.NULL.getCode();
this.bytesPerChecksum = 0;
this.fileContext.setChecksumType(ChecksumType.NULL);
this.fileContext.setBytesPerChecksum(0);
this.onDiskDataSizeWithHeader = onDiskSizeWithoutHeader +
HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
}
@ -417,9 +397,9 @@ public class HFileBlock implements Cacheable {
"uncompressedSizeWithoutHeader");
sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset");
if (minorVersion >= MINOR_VERSION_WITH_CHECKSUM) {
sanityCheckAssertion(buf.get(), checksumType, "checksumType");
sanityCheckAssertion(buf.getInt(), bytesPerChecksum, "bytesPerChecksum");
if (this.fileContext.shouldUseHBaseChecksum()) {
sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType");
sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), "bytesPerChecksum");
sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader,
"onDiskDataSizeWithHeader");
}
@ -540,17 +520,15 @@ public class HFileBlock implements Cacheable {
public long heapSize() {
long size = ClassSize.align(
ClassSize.OBJECT +
// Block type and byte buffer references
2 * ClassSize.REFERENCE +
// Block type, byte buffer and meta references
3 * ClassSize.REFERENCE +
// On-disk size, uncompressed size, and next block's on-disk size
// bytePerChecksum, onDiskDataSize and minorVersion
6 * Bytes.SIZEOF_INT +
// Checksum type
1 * Bytes.SIZEOF_BYTE +
// bytePerChecksum and onDiskDataSize
4 * Bytes.SIZEOF_INT +
// This and previous block offset
2 * Bytes.SIZEOF_LONG +
// "Include memstore timestamp" flag
Bytes.SIZEOF_BOOLEAN
// Heap size of the meta object. meta will be always not null.
fileContext.heapSize()
);
if (buf != null) {
@ -698,35 +676,24 @@ public class HFileBlock implements Cacheable {
/** The offset of the previous block of the same type */
private long prevOffset;
/** Whether we are including memstore timestamp after every key/value */
private boolean includesMemstoreTS;
/** Checksum settings */
private ChecksumType checksumType;
private int bytesPerChecksum;
/** Meta data that holds information about the hfileblock**/
private HFileContext fileContext;
/**
* @param compressionAlgorithm compression algorithm to use
* @param dataBlockEncoder data block encoding algorithm to use
* @param checksumType type of checksum
* @param bytesPerChecksum bytes per checksum
*/
public Writer(Compression.Algorithm compressionAlgorithm,
HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS,
ChecksumType checksumType, int bytesPerChecksum) {
public Writer(HFileDataBlockEncoder dataBlockEncoder, HFileContext fileContext) {
this.dataBlockEncoder = dataBlockEncoder != null
? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
defaultBlockEncodingCtx =
new HFileBlockDefaultEncodingContext(compressionAlgorithm, null, HConstants.HFILEBLOCK_DUMMY_HEADER);
dataBlockEncodingCtx =
this.dataBlockEncoder.newOnDiskDataBlockEncodingContext(
compressionAlgorithm, HConstants.HFILEBLOCK_DUMMY_HEADER);
defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null,
HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
dataBlockEncodingCtx = this.dataBlockEncoder
.newOnDiskDataBlockEncodingContext(HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext);
if (bytesPerChecksum < HConstants.HFILEBLOCK_HEADER_SIZE) {
if (fileContext.getBytesPerChecksum() < HConstants.HFILEBLOCK_HEADER_SIZE) {
throw new RuntimeException("Unsupported value of bytesPerChecksum. " +
" Minimum is " + HConstants.HFILEBLOCK_HEADER_SIZE + " but the configured value is " +
bytesPerChecksum);
fileContext.getBytesPerChecksum());
}
baosInMemory = new ByteArrayOutputStream();
@ -735,9 +702,7 @@ public class HFileBlock implements Cacheable {
for (int i = 0; i < prevOffsetByType.length; ++i)
prevOffsetByType[i] = -1;
this.includesMemstoreTS = includesMemstoreTS;
this.checksumType = checksumType;
this.bytesPerChecksum = bytesPerChecksum;
this.fileContext = fileContext;
}
/**
@ -821,7 +786,7 @@ public class HFileBlock implements Cacheable {
int numBytes = (int) ChecksumUtil.numBytes(
onDiskBytesWithHeader.length,
bytesPerChecksum);
fileContext.getBytesPerChecksum());
// put the header for on disk bytes
putHeader(onDiskBytesWithHeader, 0,
@ -835,7 +800,7 @@ public class HFileBlock implements Cacheable {
onDiskChecksum = new byte[numBytes];
ChecksumUtil.generateChecksums(
onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length,
onDiskChecksum, 0, checksumType, bytesPerChecksum);
onDiskChecksum, 0, fileContext.getChecksumType(), fileContext.getBytesPerChecksum());
}
/**
@ -848,9 +813,8 @@ public class HFileBlock implements Cacheable {
ByteBuffer.wrap(uncompressedBytesWithHeader, HConstants.HFILEBLOCK_HEADER_SIZE,
uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE).slice();
//do the encoding
dataBlockEncoder.beforeWriteToDisk(rawKeyValues,
includesMemstoreTS, dataBlockEncodingCtx, blockType);
// do the encoding
dataBlockEncoder.beforeWriteToDisk(rawKeyValues, dataBlockEncodingCtx, blockType);
uncompressedBytesWithHeader =
dataBlockEncodingCtx.getUncompressedBytesWithHeader();
@ -873,8 +837,8 @@ public class HFileBlock implements Cacheable {
offset = Bytes.putInt(dest, offset, onDiskSize - HConstants.HFILEBLOCK_HEADER_SIZE);
offset = Bytes.putInt(dest, offset, uncompressedSize - HConstants.HFILEBLOCK_HEADER_SIZE);
offset = Bytes.putLong(dest, offset, prevOffset);
offset = Bytes.putByte(dest, offset, checksumType.getCode());
offset = Bytes.putInt(dest, offset, bytesPerChecksum);
offset = Bytes.putByte(dest, offset, fileContext.getChecksumType().getCode());
offset = Bytes.putInt(dest, offset, fileContext.getBytesPerChecksum());
Bytes.putInt(dest, offset, onDiskDataSize);
}
@ -1055,12 +1019,13 @@ public class HFileBlock implements Cacheable {
* 0 value in bytesPerChecksum.
*/
public HFileBlock getBlockForCaching() {
HFileContext newContext = fileContext.clone();
newContext.setBytesPerChecksum(0);
newContext.setChecksumType(ChecksumType.NULL); // no checksums in cached data
return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
getUncompressedSizeWithoutHeader(), prevOffset,
getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset,
includesMemstoreTS, MINOR_VERSION_WITH_CHECKSUM,
0, ChecksumType.NULL.getCode(), // no checksums in cached data
onDiskBytesWithHeader.length + onDiskChecksum.length);
getUncompressedSizeWithoutHeader(), prevOffset, getUncompressedBufferWithHeader(),
DONT_FILL_HEADER, startOffset,
onDiskBytesWithHeader.length + onDiskChecksum.length, newContext);
}
}
@ -1134,14 +1099,10 @@ public class HFileBlock implements Cacheable {
*/
private abstract static class AbstractFSReader implements FSReader {
/** Compression algorithm used by the {@link HFile} */
protected Compression.Algorithm compressAlgo;
/** The size of the file we are reading from, or -1 if unknown. */
protected long fileSize;
/** The minor version of this reader */
private int minorVersion;
/** The size of the header */
protected final int hdrSize;
@ -1156,14 +1117,15 @@ public class HFileBlock implements Cacheable {
/** The default buffer size for our buffered streams */
public static final int DEFAULT_BUFFER_SIZE = 1 << 20;
public AbstractFSReader(Algorithm compressAlgo, long fileSize, int minorVersion,
HFileSystem hfs, Path path) throws IOException {
this.compressAlgo = compressAlgo;
protected HFileContext fileContext;
public AbstractFSReader(long fileSize, HFileSystem hfs, Path path, HFileContext fileContext)
throws IOException {
this.fileSize = fileSize;
this.minorVersion = minorVersion;
this.hfs = hfs;
this.path = path;
this.hdrSize = headerSize(minorVersion);
this.fileContext = fileContext;
this.hdrSize = headerSize(fileContext.shouldUseHBaseChecksum());
}
@Override
@ -1266,12 +1228,6 @@ public class HFileBlock implements Cacheable {
hdrSize;
}
/**
* @return The minorVersion of this HFile
*/
protected int getMinorVersion() {
return minorVersion;
}
}
/**
@ -1290,9 +1246,6 @@ public class HFileBlock implements Cacheable {
* does or doesn't do checksum validations in the filesystem */
protected FSDataInputStreamWrapper streamWrapper;
/** Whether we include memstore timestamp in data blocks */
protected boolean includesMemstoreTS;
/** Data block encoding used to read from file */
protected HFileDataBlockEncoder dataBlockEncoder =
NoOpDataBlockEncoder.INSTANCE;
@ -1309,28 +1262,24 @@ public class HFileBlock implements Cacheable {
}
};
public FSReaderV2(FSDataInputStreamWrapper stream, Algorithm compressAlgo, long fileSize,
int minorVersion, HFileSystem hfs, Path path) throws IOException {
super(compressAlgo, fileSize, minorVersion, hfs, path);
public FSReaderV2(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path,
HFileContext fileContext) throws IOException {
super(fileSize, hfs, path, fileContext);
this.streamWrapper = stream;
// Older versions of HBase didn't support checksum.
boolean forceNoHBaseChecksum = (this.getMinorVersion() < MINOR_VERSION_WITH_CHECKSUM);
this.streamWrapper.prepareForBlockReader(forceNoHBaseChecksum);
this.streamWrapper.prepareForBlockReader(!fileContext.shouldUseHBaseChecksum());
defaultDecodingCtx =
new HFileBlockDefaultDecodingContext(compressAlgo);
new HFileBlockDefaultDecodingContext(fileContext);
encodedBlockDecodingCtx =
new HFileBlockDefaultDecodingContext(compressAlgo);
new HFileBlockDefaultDecodingContext(fileContext);
}
/**
* A constructor that reads files with the latest minor version.
* This is used by unit tests only.
*/
FSReaderV2(FSDataInputStream istream, Algorithm compressAlgo,
long fileSize) throws IOException {
this(new FSDataInputStreamWrapper(istream), compressAlgo, fileSize,
HFileReaderV2.MAX_MINOR_VERSION, null, null);
FSReaderV2(FSDataInputStream istream, long fileSize, HFileContext fileContext) throws IOException {
this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext);
}
/**
@ -1490,7 +1439,7 @@ public class HFileBlock implements Cacheable {
// from memory if using compression. Here we have already read the
// block's header
try {
b = new HFileBlock(headerBuf, getMinorVersion());
b = new HFileBlock(headerBuf, this.fileContext.shouldUseHBaseChecksum());
} catch (IOException ex) {
// Seen in load testing. Provide comprehensive debug info.
throw new IOException("Failed to read compressed block at "
@ -1528,8 +1477,7 @@ public class HFileBlock implements Cacheable {
readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(),
hdrSize, false, offset, pread);
}
b = new HFileBlock(headerBuf, getMinorVersion());
b = new HFileBlock(headerBuf, this.fileContext.shouldUseHBaseChecksum());
onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize];
System.arraycopy(headerBuf.array(),
headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize);
@ -1538,7 +1486,7 @@ public class HFileBlock implements Cacheable {
- hdrSize, true, offset + hdrSize, pread);
onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize;
}
Algorithm compressAlgo = fileContext.getCompression();
boolean isCompressed =
compressAlgo != null
&& compressAlgo != Compression.Algorithm.NONE;
@ -1576,7 +1524,7 @@ public class HFileBlock implements Cacheable {
// contains the header of next block, so no need to set next
// block's header in it.
b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0,
onDiskSizeWithHeader), getMinorVersion());
onDiskSizeWithHeader), this.fileContext.shouldUseHBaseChecksum());
}
b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize;
@ -1588,19 +1536,19 @@ public class HFileBlock implements Cacheable {
prefetchedHeader.header, 0, hdrSize);
}
b.includesMemstoreTS = includesMemstoreTS;
b.offset = offset;
b.fileContext.setIncludesTags(this.fileContext.shouldIncludeTags());
b.fileContext.setIncludesMvcc(this.fileContext.shouldIncludeMvcc());
return b;
}
void setIncludesMemstoreTS(boolean enabled) {
includesMemstoreTS = enabled;
void setIncludesMemstoreTS(boolean includesMemstoreTS) {
this.fileContext.setIncludesMvcc(includesMemstoreTS);
}
void setDataBlockEncoder(HFileDataBlockEncoder encoder) {
this.dataBlockEncoder = encoder;
encodedBlockDecodingCtx = encoder.newOnDiskDataBlockDecodingContext(
this.compressAlgo);
encodedBlockDecodingCtx = encoder.newOnDiskDataBlockDecodingContext(this.fileContext);
}
/**
@ -1634,14 +1582,11 @@ public class HFileBlock implements Cacheable {
ByteBuffer dupBuf = this.buf.duplicate();
dupBuf.rewind();
destination.put(dupBuf);
destination.putInt(this.minorVersion);
destination.putLong(this.offset);
destination.putInt(this.nextBlockOnDiskSizeWithHeader);
destination.rewind();
serializeExtraInfo(destination);
}
public void serializeExtraInfo(ByteBuffer destination) {
destination.putInt(this.minorVersion);
destination.put(this.fileContext.shouldUseHBaseChecksum() ? (byte) 1 : (byte) 0);
destination.putLong(this.offset);
destination.putInt(this.nextBlockOnDiskSizeWithHeader);
destination.rewind();
@ -1696,10 +1641,6 @@ public class HFileBlock implements Cacheable {
return true;
}
public boolean doesIncludeMemstoreTS() {
return includesMemstoreTS;
}
public DataBlockEncoding getDataBlockEncoding() {
if (blockType == BlockType.ENCODED_DATA) {
return DataBlockEncoding.getEncodingById(getDataBlockEncodingId());
@ -1708,21 +1649,17 @@ public class HFileBlock implements Cacheable {
}
byte getChecksumType() {
return this.checksumType;
return this.fileContext.getChecksumType().getCode();
}
int getBytesPerChecksum() {
return this.bytesPerChecksum;
return this.fileContext.getBytesPerChecksum();
}
int getOnDiskDataSizeWithHeader() {
return this.onDiskDataSizeWithHeader;
}
int getMinorVersion() {
return this.minorVersion;
}
/**
* Calcuate the number of bytes required to store all the checksums
* for this block. Each checksum value is a 4 byte integer.
@ -1732,44 +1669,48 @@ public class HFileBlock implements Cacheable {
// data to validate. Similarly, a zero value in this.bytesPerChecksum
// indicates that cached blocks do not have checksum data because
// checksums were already validated when the block was read from disk.
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM || this.bytesPerChecksum == 0) {
if (!fileContext.shouldUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) {
return 0;
}
return (int)ChecksumUtil.numBytes(onDiskDataSizeWithHeader, bytesPerChecksum);
return (int)ChecksumUtil.numBytes(onDiskDataSizeWithHeader, this.fileContext.getBytesPerChecksum());
}
/**
* Returns the size of this block header.
*/
public int headerSize() {
return headerSize(this.minorVersion);
return headerSize(this.fileContext.shouldUseHBaseChecksum());
}
/**
* Maps a minor version to the size of the header.
*/
public static int headerSize(int minorVersion) {
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
public static int headerSize(boolean usesHBaseChecksum) {
if (usesHBaseChecksum) {
return HConstants.HFILEBLOCK_HEADER_SIZE;
}
return HConstants.HFILEBLOCK_HEADER_SIZE;
return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM;
}
/**
* Return the appropriate DUMMY_HEADER for the minor version
*/
public byte[] getDummyHeaderForVersion() {
return getDummyHeaderForVersion(minorVersion);
return getDummyHeaderForVersion(this.fileContext.shouldUseHBaseChecksum());
}
/**
* Return the appropriate DUMMY_HEADER for the minor version
*/
static private byte[] getDummyHeaderForVersion(int minorVersion) {
if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) {
return DUMMY_HEADER_NO_CHECKSUM;
static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) {
if (usesHBaseChecksum) {
return HConstants.HFILEBLOCK_DUMMY_HEADER;
}
return HConstants.HFILEBLOCK_DUMMY_HEADER;
return DUMMY_HEADER_NO_CHECKSUM;
}
public HFileContext getHFileContext() {
return this.fileContext;
}
/**

View File

@ -20,10 +20,9 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.util.Bytes;
/**
@ -47,6 +46,7 @@ public interface HFileDataBlockEncoder {
* </ul>
* @param block a block in an on-disk format (read from HFile or freshly
* generated).
* @param isCompaction
* @return non null block which is coded according to the settings.
*/
HFileBlock diskToCacheFormat(
@ -63,7 +63,6 @@ public interface HFileDataBlockEncoder {
*/
void beforeWriteToDisk(
ByteBuffer in,
boolean includesMemstoreTS,
HFileBlockEncodingContext encodingResult,
BlockType blockType
) throws IOException;
@ -100,24 +99,21 @@ public interface HFileDataBlockEncoder {
* encoding context should also perform compression if compressionAlgorithm is
* valid.
*
* @param compressionAlgorithm compression algorithm
* @param headerBytes header bytes
* @param fileContext HFile meta data
* @return a new {@link HFileBlockEncodingContext} object
*/
HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(
Algorithm compressionAlgorithm, byte[] headerBytes
);
HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(byte[] headerBytes,
HFileContext fileContext);
/**
* create a encoder specific decoding context for reading. And the
* decoding context should also do decompression if compressionAlgorithm
* is valid.
*
* @param compressionAlgorithm
* @param fileContext - HFile meta data
* @return a new {@link HFileBlockDecodingContext} object
*/
HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(
Algorithm compressionAlgorithm
);
HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(HFileContext fileContext);
}

View File

@ -21,13 +21,12 @@ import java.nio.ByteBuffer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.util.Bytes;
@ -156,8 +155,8 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
return block;
}
// Encode the unencoded block with the in-cache encoding.
return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS(),
createInCacheEncodingContext());
return encodeDataBlock(block, inCache,
createInCacheEncodingContext(block.getHFileContext()));
}
if (block.getBlockType() == BlockType.ENCODED_DATA) {
@ -183,7 +182,6 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
*/
@Override
public void beforeWriteToDisk(ByteBuffer in,
boolean includesMemstoreTS,
HFileBlockEncodingContext encodeCtx,
BlockType blockType) throws IOException {
if (onDisk == DataBlockEncoding.NONE) {
@ -192,8 +190,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
in.array(), blockType);
return;
}
encodeBufferToHFileBlockBuffer(in, onDisk,
includesMemstoreTS, encodeCtx);
encodeBufferToHFileBlockBuffer(in, onDisk, encodeCtx);
}
@Override
@ -209,15 +206,13 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
*
* @param in input data to encode
* @param algo encoding algorithm
* @param includesMemstoreTS includes memstore timestamp or not
* @param encodeCtx where will the output data be stored
*/
private void encodeBufferToHFileBlockBuffer(ByteBuffer in,
DataBlockEncoding algo, boolean includesMemstoreTS,
private void encodeBufferToHFileBlockBuffer(ByteBuffer in, DataBlockEncoding algo,
HFileBlockEncodingContext encodeCtx) {
DataBlockEncoder encoder = algo.getEncoder();
try {
encoder.encodeKeyValues(in, includesMemstoreTS, encodeCtx);
encoder.encodeKeyValues(in, encodeCtx);
} catch (IOException e) {
throw new RuntimeException(String.format(
"Bug in data block encoder "
@ -227,12 +222,11 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
}
}
private HFileBlock encodeDataBlock(HFileBlock block,
DataBlockEncoding algo, boolean includesMemstoreTS,
private HFileBlock encodeDataBlock(HFileBlock block, DataBlockEncoding algo,
HFileBlockEncodingContext encodingCtx) {
encodingCtx.setDummyHeader(block.getDummyHeaderForVersion());
encodeBufferToHFileBlockBuffer(
block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx);
block.getBufferWithoutHeader(), algo, encodingCtx);
byte[] encodedUncompressedBytes =
encodingCtx.getUncompressedBytesWithHeader();
ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes);
@ -241,9 +235,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
block.getOnDiskSizeWithoutHeader(),
sizeWithoutHeader, block.getPrevBlockOffset(),
bufferWrapper, HFileBlock.FILL_HEADER, block.getOffset(),
includesMemstoreTS, block.getMinorVersion(),
block.getBytesPerChecksum(), block.getChecksumType(),
block.getOnDiskDataSizeWithHeader());
block.getOnDiskDataSizeWithHeader(), encodingCtx.getHFileContext());
return encodedBlock;
}
@ -253,14 +245,14 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
* See HBASE-8732
* @return a new in cache encoding context
*/
private HFileBlockEncodingContext createInCacheEncodingContext() {
private HFileBlockEncodingContext createInCacheEncodingContext(HFileContext meta) {
HFileContext newMeta = meta.clone();
return (inCache != DataBlockEncoding.NONE) ?
this.inCache.getEncoder().newDataBlockEncodingContext(
Algorithm.NONE, this.inCache, dummyHeader)
:
// create a default encoding context
new HFileBlockDefaultEncodingContext(Algorithm.NONE,
this.inCache, dummyHeader);
this.inCache.getEncoder().newDataBlockEncodingContext(
this.inCache, dummyHeader, newMeta)
:
// create a default encoding context
new HFileBlockDefaultEncodingContext(this.inCache, dummyHeader, newMeta);
}
@Override
@ -271,29 +263,25 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder {
@Override
public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(
Algorithm compressionAlgorithm, byte[] dummyHeader) {
byte[] dummyHeader, HFileContext fileContext) {
if (onDisk != null) {
DataBlockEncoder encoder = onDisk.getEncoder();
if (encoder != null) {
return encoder.newDataBlockEncodingContext(
compressionAlgorithm, onDisk, dummyHeader);
return encoder.newDataBlockEncodingContext(onDisk, dummyHeader, fileContext);
}
}
return new HFileBlockDefaultEncodingContext(compressionAlgorithm,
null, dummyHeader);
return new HFileBlockDefaultEncodingContext(null, dummyHeader, fileContext);
}
@Override
public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(
Algorithm compressionAlgorithm) {
public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(HFileContext fileContext) {
if (onDisk != null) {
DataBlockEncoder encoder = onDisk.getEncoder();
if (encoder != null) {
return encoder.newDataBlockDecodingContext(
compressionAlgorithm);
return encoder.newDataBlockDecodingContext(fileContext);
}
}
return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
return new HFileBlockDefaultDecodingContext(fileContext);
}
}

View File

@ -49,6 +49,7 @@ import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
import org.apache.hadoop.hbase.util.BloomFilter;
@ -275,6 +276,12 @@ public class HFilePrettyPrinter {
System.out.print("K: " + kv);
if (printValue) {
System.out.print(" V: " + Bytes.toStringBinary(kv.getValue()));
int i = 0;
List<Tag> tags = kv.getTags();
for (Tag tag : tags) {
System.out
.print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue())));
}
}
System.out.println();
}

View File

@ -1,5 +1,4 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@ -27,15 +26,16 @@ import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.IdLock;
import org.apache.hadoop.io.WritableUtils;
@ -50,21 +50,28 @@ public class HFileReaderV2 extends AbstractHFileReader {
private static final Log LOG = LogFactory.getLog(HFileReaderV2.class);
/** Minor versions in HFile V2 starting with this number have hbase checksums */
public static final int MINOR_VERSION_WITH_CHECKSUM = 1;
/** In HFile V2 minor version that does not support checksums */
public static final int MINOR_VERSION_NO_CHECKSUM = 0;
/** HFile minor version that introduced pbuf filetrailer */
public static final int PBUF_TRAILER_MINOR_VERSION = 2;
/**
* The size of a (key length, value length) tuple that prefixes each entry in
* a data block.
*/
private static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT;
private boolean includesMemstoreTS = false;
private boolean decodeMemstoreTS = false;
private boolean shouldIncludeMemstoreTS() {
protected boolean includesMemstoreTS = false;
protected boolean decodeMemstoreTS = false;
protected boolean shouldIncludeMemstoreTS() {
return includesMemstoreTS;
}
/** Filesystem-level block reader. */
private HFileBlock.FSReader fsBlockReader;
protected HFileBlock.FSReader fsBlockReader;
/**
* A "sparse lock" implementation allowing to lock on a particular block
@ -90,6 +97,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
/** Minor versions starting with this number have faked index key */
static final int MINOR_VERSION_WITH_FAKED_KEY = 3;
protected HFileContext hfileContext;
/**
* Opens a HFile. You must load the index before you can use it by calling
@ -103,16 +111,19 @@ public class HFileReaderV2 extends AbstractHFileReader {
* @param preferredEncodingInCache the encoding to use in cache in case we
* have a choice. If the file is already encoded on disk, we will
* still use its on-disk encoding in cache.
* @param hfs
*/
public HFileReaderV2(Path path, FixedFileTrailer trailer,
final FSDataInputStreamWrapper fsdis, final long size, final CacheConfig cacheConf,
DataBlockEncoding preferredEncodingInCache, final HFileSystem hfs)
throws IOException {
super(path, trailer, size, cacheConf, hfs);
trailer.expectMajorVersion(2);
trailer.expectMajorVersion(getMajorVersion());
validateMinorVersion(path, trailer.getMinorVersion());
HFileBlock.FSReaderV2 fsBlockReaderV2 = new HFileBlock.FSReaderV2(fsdis,
compressAlgo, fileSize, trailer.getMinorVersion(), hfs, path);
this.hfileContext = createHFileContext(trailer);
// Should we set the preferredEncodinginCache here for the context
HFileBlock.FSReaderV2 fsBlockReaderV2 = new HFileBlock.FSReaderV2(fsdis, fileSize, hfs, path,
hfileContext);
this.fsBlockReader = fsBlockReaderV2; // upcast
// Comparator class name is stored in the trailer in version 2.
@ -167,6 +178,15 @@ public class HFileReaderV2 extends AbstractHFileReader {
}
}
protected HFileContext createHFileContext(FixedFileTrailer trailer) {
HFileContext meta = new HFileContext();
meta.setIncludesMvcc(this.includesMemstoreTS);
meta.setUsesHBaseChecksum(
trailer.getMinorVersion() >= MINOR_VERSION_WITH_CHECKSUM);
meta.setCompressAlgo(this.compressAlgo);
return meta;
}
/**
* Create a Scanner on this file. No seeks or reads are done on creation. Call
* {@link HFileScanner#seekTo(byte[])} to position an start the read. There is
@ -185,7 +205,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
// check if we want to use data block encoding in memory
if (dataBlockEncoder.useEncodedScanner(isCompaction)) {
return new EncodedScannerV2(this, cacheBlocks, pread, isCompaction,
includesMemstoreTS);
hfileContext);
}
return new ScannerV2(this, cacheBlocks, pread, isCompaction);
@ -338,7 +358,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
long startTimeNs = System.nanoTime();
HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, -1,
pread);
hfileBlock = dataBlockEncoder.diskToCacheFormat(hfileBlock, isCompaction);
hfileBlock = diskToCacheFormat(hfileBlock, isCompaction);
validateBlockType(hfileBlock, expectedBlockType);
final long delta = System.nanoTime() - startTimeNs;
@ -363,6 +383,10 @@ public class HFileReaderV2 extends AbstractHFileReader {
}
}
protected HFileBlock diskToCacheFormat( HFileBlock hfileBlock, final boolean isCompaction) {
return dataBlockEncoder.diskToCacheFormat(hfileBlock, isCompaction);
}
/**
* Compares the actual type of a block retrieved from cache or disk with its
* expected type and throws an exception in case of a mismatch. Expected
@ -612,16 +636,18 @@ public class HFileReaderV2 extends AbstractHFileReader {
if (!isSeeked())
return null;
KeyValue ret = new KeyValue(blockBuffer.array(),
blockBuffer.arrayOffset() + blockBuffer.position(),
KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen,
currKeyLen);
KeyValue ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
+ blockBuffer.position(), getKvBufSize(), currKeyLen);
if (this.reader.shouldIncludeMemstoreTS()) {
ret.setMvccVersion(currMemstoreTS);
}
return ret;
}
protected int getKvBufSize() {
return KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen;
}
@Override
public ByteBuffer getKey() {
assertSeeked();
@ -640,7 +666,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
+ KEY_VALUE_LEN_SIZE + currKeyLen, currValueLen).slice();
}
private void setNonSeekedState() {
protected void setNonSeekedState() {
block = null;
blockBuffer = null;
currKeyLen = 0;
@ -661,8 +687,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
assertSeeked();
try {
blockBuffer.position(blockBuffer.position() + KEY_VALUE_LEN_SIZE
+ currKeyLen + currValueLen + currMemstoreTSLen);
blockBuffer.position(getNextKVStartPosition());
} catch (IllegalArgumentException e) {
LOG.error("Current pos = " + blockBuffer.position()
+ "; currKeyLen = " + currKeyLen + "; currValLen = "
@ -697,6 +722,11 @@ public class HFileReaderV2 extends AbstractHFileReader {
return true;
}
protected int getNextKVStartPosition() {
return blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen
+ currMemstoreTSLen;
}
/**
* Positions this scanner at the start of the file.
*
@ -753,7 +783,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
*
* @param newBlock the block to make current
*/
private void updateCurrBlock(HFileBlock newBlock) {
protected void updateCurrBlock(HFileBlock newBlock) {
block = newBlock;
// sanity check
@ -773,19 +803,29 @@ public class HFileReaderV2 extends AbstractHFileReader {
this.nextIndexedKey = null;
}
private final void readKeyValueLen() {
protected void readKeyValueLen() {
blockBuffer.mark();
currKeyLen = blockBuffer.getInt();
currValueLen = blockBuffer.getInt();
ByteBufferUtils.skip(blockBuffer, currKeyLen + currValueLen);
readMvccVersion();
if (currKeyLen < 0 || currValueLen < 0
|| currKeyLen > blockBuffer.limit()
|| currValueLen > blockBuffer.limit()) {
throw new IllegalStateException("Invalid currKeyLen " + currKeyLen
+ " or currValueLen " + currValueLen + ". Block offset: "
+ block.getOffset() + ", block length: " + blockBuffer.limit()
+ ", position: " + blockBuffer.position() + " (without header).");
}
blockBuffer.reset();
}
protected void readMvccVersion() {
if (this.reader.shouldIncludeMemstoreTS()) {
if (this.reader.decodeMemstoreTS) {
try {
int memstoreTSOffset = blockBuffer.arrayOffset()
+ blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen
+ currValueLen;
currMemstoreTS = Bytes.readVLong(blockBuffer.array(),
memstoreTSOffset);
currMemstoreTS = Bytes.readVLong(blockBuffer.array(), blockBuffer.arrayOffset()
+ blockBuffer.position());
currMemstoreTSLen = WritableUtils.getVIntSize(currMemstoreTS);
} catch (Exception e) {
throw new RuntimeException("Error reading memstore timestamp", e);
@ -795,15 +835,6 @@ public class HFileReaderV2 extends AbstractHFileReader {
currMemstoreTSLen = 1;
}
}
if (currKeyLen < 0 || currValueLen < 0
|| currKeyLen > blockBuffer.limit()
|| currValueLen > blockBuffer.limit()) {
throw new IllegalStateException("Invalid currKeyLen " + currKeyLen
+ " or currValueLen " + currValueLen + ". Block offset: "
+ block.getOffset() + ", block length: " + blockBuffer.limit()
+ ", position: " + blockBuffer.position() + " (without header).");
}
}
/**
@ -821,7 +852,7 @@ public class HFileReaderV2 extends AbstractHFileReader {
* -2 in case of an inexact match and furthermore, the input key less
* than the first key of current block(e.g. using a faked index key)
*/
private int blockSeek(byte[] key, int offset, int length,
protected int blockSeek(byte[] key, int offset, int length,
boolean seekBefore) {
int klen, vlen;
long memstoreTS = 0;
@ -931,34 +962,34 @@ public class HFileReaderV2 extends AbstractHFileReader {
*/
protected static class EncodedScannerV2 extends AbstractScannerV2 {
private DataBlockEncoder.EncodedSeeker seeker = null;
private DataBlockEncoder dataBlockEncoder = null;
private final boolean includesMemstoreTS;
protected DataBlockEncoder dataBlockEncoder = null;
protected final HFileContext meta;
protected HFileBlockDecodingContext decodingCtx;
public EncodedScannerV2(HFileReaderV2 reader, boolean cacheBlocks,
boolean pread, boolean isCompaction, boolean includesMemstoreTS) {
boolean pread, boolean isCompaction, HFileContext meta) {
super(reader, cacheBlocks, pread, isCompaction);
this.includesMemstoreTS = includesMemstoreTS;
this.meta = meta;
}
private void setDataBlockEncoder(DataBlockEncoder dataBlockEncoder) {
protected void setDataBlockEncoder(DataBlockEncoder dataBlockEncoder) {
this.dataBlockEncoder = dataBlockEncoder;
seeker = dataBlockEncoder.createSeeker(reader.getComparator(),
includesMemstoreTS);
decodingCtx = this.dataBlockEncoder.newDataBlockDecodingContext(
this.meta);
seeker = dataBlockEncoder.createSeeker(reader.getComparator(), decodingCtx);
}
/**
* Updates the current block to be the given {@link HFileBlock}. Seeks to
* the the first key/value pair.
*
* @param newBlock the block to make current
*/
private void updateCurrentBlock(HFileBlock newBlock) {
protected void updateCurrentBlock(HFileBlock newBlock) {
block = newBlock;
// sanity checks
if (block.getBlockType() != BlockType.ENCODED_DATA) {
throw new IllegalStateException(
"EncodedScannerV2 works only on encoded data blocks");
"EncodedScanner works only on encoded data blocks");
}
short dataBlockEncoderId = block.getDataBlockEncodingId();
@ -1131,4 +1162,9 @@ public class HFileReaderV2 extends AbstractHFileReader {
throw new RuntimeException(msg);
}
}
@Override
public int getMajorVersion() {
return 2;
}
}

View File

@ -0,0 +1,276 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.WritableUtils;
/**
* {@link HFile} reader for version 3.
* This Reader is aware of Tags.
*/
@InterfaceAudience.Private
public class HFileReaderV3 extends HFileReaderV2 {
public static final int MAX_MINOR_VERSION = 0;
/**
* Opens a HFile. You must load the index before you can use it by calling
* {@link #loadFileInfo()}.
* @param path
* Path to HFile.
* @param trailer
* File trailer.
* @param fsdis
* input stream.
* @param size
* Length of the stream.
* @param cacheConf
* Cache configuration.
* @param preferredEncodingInCache
* the encoding to use in cache in case we have a choice. If the file
* is already encoded on disk, we will still use its on-disk encoding
* in cache.
*/
public HFileReaderV3(Path path, FixedFileTrailer trailer, final FSDataInputStreamWrapper fsdis,
final long size, final CacheConfig cacheConf, DataBlockEncoding preferredEncodingInCache,
final HFileSystem hfs) throws IOException {
super(path, trailer, fsdis, size, cacheConf, preferredEncodingInCache, hfs);
}
@Override
protected HFileContext createHFileContext(FixedFileTrailer trailer) {
HFileContext meta = new HFileContext();
meta.setIncludesMvcc(this.includesMemstoreTS);
meta.setUsesHBaseChecksum(true);
meta.setCompressAlgo(this.compressAlgo);
meta.setIncludesTags(true);
return meta;
}
/**
* Create a Scanner on this file. No seeks or reads are done on creation. Call
* {@link HFileScanner#seekTo(byte[])} to position an start the read. There is
* nothing to clean up in a Scanner. Letting go of your references to the
* scanner is sufficient.
* @param cacheBlocks
* True if we should cache blocks read in by this scanner.
* @param pread
* Use positional read rather than seek+read if true (pread is better
* for random reads, seek+read is better scanning).
* @param isCompaction
* is scanner being used for a compaction?
* @return Scanner on this file.
*/
@Override
public HFileScanner getScanner(boolean cacheBlocks, final boolean pread,
final boolean isCompaction) {
// check if we want to use data block encoding in memory
if (dataBlockEncoder.useEncodedScanner(isCompaction)) {
return new EncodedScannerV3(this, cacheBlocks, pread, isCompaction, this.hfileContext);
}
return new ScannerV3(this, cacheBlocks, pread, isCompaction);
}
/**
* Implementation of {@link HFileScanner} interface.
*/
protected static class ScannerV3 extends ScannerV2 {
private HFileReaderV3 reader;
private int currTagsLen;
public ScannerV3(HFileReaderV3 r, boolean cacheBlocks, final boolean pread,
final boolean isCompaction) {
super(r, cacheBlocks, pread, isCompaction);
this.reader = r;
}
@Override
protected int getKvBufSize() {
int kvBufSize = super.getKvBufSize();
if (reader.hfileContext.shouldIncludeTags()) {
kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen;
}
return kvBufSize;
}
protected void setNonSeekedState() {
super.setNonSeekedState();
currTagsLen = 0;
}
@Override
protected int getNextKVStartPosition() {
int nextKvPos = super.getNextKVStartPosition();
if (reader.hfileContext.shouldIncludeTags()) {
nextKvPos += Bytes.SIZEOF_SHORT + currTagsLen;
}
return nextKvPos;
}
protected void readKeyValueLen() {
blockBuffer.mark();
currKeyLen = blockBuffer.getInt();
currValueLen = blockBuffer.getInt();
ByteBufferUtils.skip(blockBuffer, currKeyLen + currValueLen);
if (reader.hfileContext.shouldIncludeTags()) {
currTagsLen = blockBuffer.getShort();
ByteBufferUtils.skip(blockBuffer, currTagsLen);
}
readMvccVersion();
if (currKeyLen < 0 || currValueLen < 0 || currTagsLen < 0 || currKeyLen > blockBuffer.limit()
|| currValueLen > blockBuffer.limit() || currTagsLen > blockBuffer.limit()) {
throw new IllegalStateException("Invalid currKeyLen " + currKeyLen + " or currValueLen "
+ currValueLen + " or currTagLen " + currTagsLen + ". Block offset: "
+ block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
+ blockBuffer.position() + " (without header).");
}
blockBuffer.reset();
}
/**
* Within a loaded block, seek looking for the last key that is smaller than
* (or equal to?) the key we are interested in.
* A note on the seekBefore: if you have seekBefore = true, AND the first
* key in the block = key, then you'll get thrown exceptions. The caller has
* to check for that case and load the previous block as appropriate.
* @param key
* the key to find
* @param seekBefore
* find the key before the given key in case of exact match.
* @param offset
* Offset to find the key in the given bytebuffer
* @param length
* Length of the key to be found
* @return 0 in case of an exact key match, 1 in case of an inexact match,
* -2 in case of an inexact match and furthermore, the input key
* less than the first key of current block(e.g. using a faked index
* key)
*/
protected int blockSeek(byte[] key, int offset, int length, boolean seekBefore) {
int klen, vlen, tlen = 0;
long memstoreTS = 0;
int memstoreTSLen = 0;
int lastKeyValueSize = -1;
do {
blockBuffer.mark();
klen = blockBuffer.getInt();
vlen = blockBuffer.getInt();
ByteBufferUtils.skip(blockBuffer, klen + vlen);
if (reader.hfileContext.shouldIncludeTags()) {
tlen = blockBuffer.getShort();
ByteBufferUtils.skip(blockBuffer, tlen);
}
if (this.reader.shouldIncludeMemstoreTS()) {
if (this.reader.decodeMemstoreTS) {
try {
memstoreTS = Bytes.readVLong(blockBuffer.array(), blockBuffer.arrayOffset()
+ blockBuffer.position());
memstoreTSLen = WritableUtils.getVIntSize(memstoreTS);
} catch (Exception e) {
throw new RuntimeException("Error reading memstore timestamp", e);
}
} else {
memstoreTS = 0;
memstoreTSLen = 1;
}
}
blockBuffer.reset();
int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2);
int comp = reader.getComparator().compare(key, offset, length, blockBuffer.array(),
keyOffset, klen);
if (comp == 0) {
if (seekBefore) {
if (lastKeyValueSize < 0) {
throw new IllegalStateException("blockSeek with seekBefore "
+ "at the first key of the block: key=" + Bytes.toStringBinary(key)
+ ", blockOffset=" + block.getOffset() + ", onDiskSize="
+ block.getOnDiskSizeWithHeader());
}
blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
readKeyValueLen();
return 1; // non exact match.
}
currKeyLen = klen;
currValueLen = vlen;
currTagsLen = tlen;
if (this.reader.shouldIncludeMemstoreTS()) {
currMemstoreTS = memstoreTS;
currMemstoreTSLen = memstoreTSLen;
}
return 0; // indicate exact match
} else if (comp < 0) {
if (lastKeyValueSize > 0)
blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
readKeyValueLen();
if (lastKeyValueSize == -1 && blockBuffer.position() == 0) {
return HConstants.INDEX_KEY_MAGIC;
}
return 1;
}
// The size of this key/value tuple, including key/value length fields.
lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE;
// include tag length also if tags included with KV
if (reader.hfileContext.shouldIncludeTags()) {
lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT;
}
blockBuffer.position(blockBuffer.position() + lastKeyValueSize);
} while (blockBuffer.remaining() > 0);
// Seek to the last key we successfully read. This will happen if this is
// the last key/value pair in the file, in which case the following call
// to next() has to return false.
blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
readKeyValueLen();
return 1; // didn't exactly find it.
}
}
/**
* ScannerV3 that operates on encoded data blocks.
*/
protected static class EncodedScannerV3 extends EncodedScannerV2 {
public EncodedScannerV3(HFileReaderV3 reader, boolean cacheBlocks, boolean pread,
boolean isCompaction, HFileContext meta) {
super(reader, cacheBlocks, pread, isCompaction, meta);
}
}
@Override
public int getMajorVersion() {
return 3;
}
@Override
protected HFileBlock diskToCacheFormat(HFileBlock hfileBlock, final boolean isCompaction) {
return dataBlockEncoder.diskToCacheFormat(hfileBlock, isCompaction);
}
}

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
import org.apache.hadoop.hbase.util.ChecksumType;
@ -66,7 +67,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
new ArrayList<InlineBlockWriter>();
/** Unified version 2 block writer */
private HFileBlock.Writer fsBlockWriter;
protected HFileBlock.Writer fsBlockWriter;
private HFileBlockIndex.BlockIndexWriter dataBlockIndexWriter;
private HFileBlockIndex.BlockIndexWriter metaBlockIndexWriter;
@ -75,7 +76,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
private long firstDataBlockOffset = -1;
/** The offset of the last data block or 0 if the file is empty. */
private long lastDataBlockOffset;
protected long lastDataBlockOffset;
/** The last(stop) Key of the previous data block. */
private byte[] lastKeyOfPreviousBlock = null;
@ -84,12 +85,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
private List<BlockWritable> additionalLoadOnOpenData =
new ArrayList<BlockWritable>();
/** Checksum related settings */
private ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
private int bytesPerChecksum = HFile.DEFAULT_BYTES_PER_CHECKSUM;
private final boolean includeMemstoreTS;
private long maxMemstoreTS = 0;
protected long maxMemstoreTS = 0;
static class WriterFactoryV2 extends HFile.WriterFactory {
WriterFactoryV2(Configuration conf, CacheConfig cacheConf) {
@ -97,39 +93,30 @@ public class HFileWriterV2 extends AbstractHFileWriter {
}
@Override
public Writer createWriter(FileSystem fs, Path path,
FSDataOutputStream ostream, int blockSize,
Compression.Algorithm compress, HFileDataBlockEncoder blockEncoder,
final KVComparator comparator, final ChecksumType checksumType,
final int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException {
return new HFileWriterV2(conf, cacheConf, fs, path, ostream, blockSize, compress,
blockEncoder, comparator, checksumType, bytesPerChecksum, includeMVCCReadpoint);
public Writer createWriter(FileSystem fs, Path path,
FSDataOutputStream ostream,
KVComparator comparator, HFileContext context) throws IOException {
return new HFileWriterV2(conf, cacheConf, fs, path, ostream,
comparator, context);
}
}
}
/** Constructor that takes a path, creates and closes the output stream. */
public HFileWriterV2(Configuration conf, CacheConfig cacheConf,
FileSystem fs, Path path, FSDataOutputStream ostream, int blockSize,
Compression.Algorithm compressAlgo, HFileDataBlockEncoder blockEncoder,
final KVComparator comparator, final ChecksumType checksumType,
final int bytesPerChecksum, final boolean includeMVCCReadpoint) throws IOException {
FileSystem fs, Path path, FSDataOutputStream ostream,
final KVComparator comparator, final HFileContext context) throws IOException {
super(cacheConf,
ostream == null ? createOutputStream(conf, fs, path, null) : ostream,
path, blockSize, compressAlgo, blockEncoder, comparator);
this.checksumType = checksumType;
this.bytesPerChecksum = bytesPerChecksum;
this.includeMemstoreTS = includeMVCCReadpoint;
path, comparator, context);
finishInit(conf);
}
/** Additional initialization steps */
private void finishInit(final Configuration conf) {
protected void finishInit(final Configuration conf) {
if (fsBlockWriter != null)
throw new IllegalStateException("finishInit called twice");
// HFile filesystem-level (non-caching) block writer
fsBlockWriter = new HFileBlock.Writer(compressAlgo, blockEncoder,
includeMemstoreTS, checksumType, bytesPerChecksum);
fsBlockWriter = createBlockWriter();
// Data block index writer
boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite();
@ -145,13 +132,21 @@ public class HFileWriterV2 extends AbstractHFileWriter {
if (LOG.isTraceEnabled()) LOG.trace("Initialized with " + cacheConf);
}
protected HFileBlock.Writer createBlockWriter() {
// HFile filesystem-level (non-caching) block writer
hFileContext.setIncludesTags(false);
// This can be set while the write is created itself because
// in both cases useHBaseChecksum is going to be true
hFileContext.setUsesHBaseChecksum(true);
return new HFileBlock.Writer(blockEncoder, hFileContext);
}
/**
* At a block boundary, write all the inline blocks and opens new block.
*
* @throws IOException
*/
private void checkBlockBoundary() throws IOException {
if (fsBlockWriter.blockSizeWritten() < blockSize)
protected void checkBlockBoundary() throws IOException {
if (fsBlockWriter.blockSizeWritten() < hFileContext.getBlocksize())
return;
finishBlock();
@ -224,7 +219,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
*
* @throws IOException
*/
private void newBlock() throws IOException {
protected void newBlock() throws IOException {
// This is where the next block begins.
fsBlockWriter.startWriting(BlockType.DATA);
firstKeyInBlock = null;
@ -303,8 +298,8 @@ public class HFileWriterV2 extends AbstractHFileWriter {
* @param vlength
* @throws IOException
*/
private void append(final long memstoreTS, final byte[] key, final int koffset, final int klength,
final byte[] value, final int voffset, final int vlength)
protected void append(final long memstoreTS, final byte[] key, final int koffset,
final int klength, final byte[] value, final int voffset, final int vlength)
throws IOException {
boolean dupKey = checkKey(key, koffset, klength);
checkValue(value, voffset, vlength);
@ -325,7 +320,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
totalValueLength += vlength;
out.write(key, koffset, klength);
out.write(value, voffset, vlength);
if (this.includeMemstoreTS) {
if (this.hFileContext.shouldIncludeMvcc()) {
WritableUtils.writeVLong(out, memstoreTS);
}
}
@ -356,8 +351,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
finishBlock();
writeInlineBlocks(true);
FixedFileTrailer trailer = new FixedFileTrailer(2,
HFileReaderV2.MAX_MINOR_VERSION);
FixedFileTrailer trailer = new FixedFileTrailer(getMajorVersion(), getMinorVersion());
// Write out the metadata blocks if any.
if (!metaNames.isEmpty()) {
@ -395,7 +389,7 @@ public class HFileWriterV2 extends AbstractHFileWriter {
fsBlockWriter.writeHeaderAndData(outputStream);
totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader();
if (this.includeMemstoreTS) {
if (this.hFileContext.shouldIncludeMvcc()) {
appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS));
appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE));
}
@ -466,4 +460,17 @@ public class HFileWriterV2 extends AbstractHFileWriter {
}
});
}
@Override
public void append(byte[] key, byte[] value, byte[] tag) throws IOException {
throw new UnsupportedOperationException("KV tags are supported only from HFile V3");
}
protected int getMajorVersion() {
return 2;
}
protected int getMinorVersion() {
return HFileReaderV2.MAX_MINOR_VERSION;
}
}

View File

@ -0,0 +1,202 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.WritableUtils;
/**
* This is an extension of HFileWriterV2 that is tags aware.
*/
@InterfaceAudience.Private
public class HFileWriterV3 extends HFileWriterV2 {
// TODO : Use this to track maxtaglength
private int maxTagsLength = 0;
static class WriterFactoryV3 extends HFile.WriterFactory {
WriterFactoryV3(Configuration conf, CacheConfig cacheConf) {
super(conf, cacheConf);
}
@Override
public Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream,
final KVComparator comparator, HFileContext fileContext)
throws IOException {
return new HFileWriterV3(conf, cacheConf, fs, path, ostream, comparator, fileContext);
}
}
/** Constructor that takes a path, creates and closes the output stream. */
public HFileWriterV3(Configuration conf, CacheConfig cacheConf, FileSystem fs, Path path,
FSDataOutputStream ostream, final KVComparator comparator,
final HFileContext fileContext) throws IOException {
super(conf, cacheConf, fs, path, ostream, comparator, fileContext);
}
/**
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
*
* @param kv
* KeyValue to add. Cannot be empty nor null.
* @throws IOException
*/
@Override
public void append(final KeyValue kv) throws IOException {
// Currently get the complete arrays
append(kv.getMvccVersion(), kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength(),
kv.getBuffer(), kv.getValueOffset(), kv.getValueLength(), kv.getBuffer(),
kv.getTagsOffset(), kv.getTagsLength());
this.maxMemstoreTS = Math.max(this.maxMemstoreTS, kv.getMvccVersion());
}
/**
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
* @param key
* Key to add. Cannot be empty nor null.
* @param value
* Value to add. Cannot be empty nor null.
* @throws IOException
*/
@Override
public void append(final byte[] key, final byte[] value) throws IOException {
append(key, value, HConstants.EMPTY_BYTE_ARRAY);
}
/**
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
* @param key
* Key to add. Cannot be empty nor null.
* @param value
* Value to add. Cannot be empty nor null.
* @param tag
* Tag t add. Cannot be empty or null.
* @throws IOException
*/
@Override
public void append(final byte[] key, final byte[] value, byte[] tag) throws IOException {
append(0, key, 0, key.length, value, 0, value.length, tag, 0, tag.length);
}
/**
* Add key/value to file. Keys must be added in an order that agrees with the
* Comparator passed on construction.
* @param key
* @param koffset
* @param klength
* @param value
* @param voffset
* @param vlength
* @param tag
* @param tagsOffset
* @param tagLength
* @throws IOException
*/
private void append(final long memstoreTS, final byte[] key, final int koffset,
final int klength, final byte[] value, final int voffset, final int vlength,
final byte[] tag, final int tagsOffset, final int tagsLength) throws IOException {
boolean dupKey = checkKey(key, koffset, klength);
checkValue(value, voffset, vlength);
if (!dupKey) {
checkBlockBoundary();
}
if (!fsBlockWriter.isWriting())
newBlock();
// Write length of key and value and then actual key and value bytes.
// Additionally, we may also write down the memstoreTS.
{
DataOutputStream out = fsBlockWriter.getUserDataStream();
out.writeInt(klength);
totalKeyLength += klength;
out.writeInt(vlength);
totalValueLength += vlength;
out.write(key, koffset, klength);
out.write(value, voffset, vlength);
// Write the additional tag into the stream
if (hFileContext.shouldIncludeTags()) {
out.writeShort((short) tagsLength);
if (tagsLength > 0) {
out.write(tag, tagsOffset, tagsLength);
if (tagsLength > maxTagsLength) {
maxTagsLength = tagsLength;
}
}
}
if (this.hFileContext.shouldIncludeMvcc()) {
WritableUtils.writeVLong(out, memstoreTS);
}
}
// Are we the first key in this block?
if (firstKeyInBlock == null) {
// Copy the key.
firstKeyInBlock = new byte[klength];
System.arraycopy(key, koffset, firstKeyInBlock, 0, klength);
}
lastKeyBuffer = key;
lastKeyOffset = koffset;
lastKeyLength = klength;
entryCount++;
}
protected void finishFileInfo() throws IOException {
super.finishFileInfo();
if (hFileContext.shouldIncludeTags()) {
// When tags are not being written in this file, MAX_TAGS_LEN is excluded
// from the FileInfo
fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false);
}
}
@Override
protected HFileBlock.Writer createBlockWriter() {
// HFile filesystem-level (non-caching) block writer
hFileContext.setIncludesTags(true);
hFileContext.setUsesHBaseChecksum(true);
return new HFileBlock.Writer(blockEncoder, hFileContext);
}
@Override
protected int getMajorVersion() {
return 3;
}
@Override
protected int getMinorVersion() {
return HFileReaderV3.MAX_MINOR_VERSION;
}
}

View File

@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
/**
* Does not perform any kind of encoding/decoding.
@ -50,7 +51,6 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder {
@Override
public void beforeWriteToDisk(ByteBuffer in,
boolean includesMemstoreTS,
HFileBlockEncodingContext encodeCtx, BlockType blockType)
throws IOException {
if (!(encodeCtx.getClass().getName().equals(
@ -95,15 +95,13 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder {
@Override
public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(
Algorithm compressionAlgorithm, byte[] dummyHeader) {
return new HFileBlockDefaultEncodingContext(compressionAlgorithm,
null, dummyHeader);
byte[] dummyHeader, HFileContext meta) {
return new HFileBlockDefaultEncodingContext(null, dummyHeader, meta);
}
@Override
public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(
Algorithm compressionAlgorithm) {
return new HFileBlockDefaultDecodingContext(compressionAlgorithm);
public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(HFileContext meta) {
return new HFileBlockDefaultDecodingContext(meta);
}
}

View File

@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder;
@ -106,20 +107,7 @@ public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable,
final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf);
final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf);
String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
final HFileDataBlockEncoder encoder;
if (dataBlockEncodingStr == null) {
encoder = NoOpDataBlockEncoder.INSTANCE;
} else {
try {
encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding
.valueOf(dataBlockEncodingStr));
} catch (IllegalArgumentException ex) {
throw new RuntimeException(
"Invalid data block encoding type configured for the param "
+ DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr);
}
}
final String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY);
return new RecordWriter<ImmutableBytesWritable, KeyValue>() {
// Map of families to writers and how much has been output on the writer.
@ -206,14 +194,18 @@ public class HFileOutputFormat extends FileOutputFormat<ImmutableBytesWritable,
: Integer.parseInt(blockSizeString);
Configuration tempConf = new Configuration(conf);
tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f);
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs, blockSize)
.withOutputDir(familydir)
.withCompression(AbstractHFileWriter.compressionByName(compression))
.withBloomType(bloomType)
.withComparator(KeyValue.COMPARATOR)
.withDataBlockEncoder(encoder)
.withChecksumType(HStore.getChecksumType(conf))
.withBytesPerChecksum(HStore.getBytesPerChecksum(conf))
HFileContext meta = new HFileContext();
meta.setCompressAlgo(AbstractHFileWriter.compressionByName(compression));
meta.setChecksumType(HStore.getChecksumType(conf));
meta.setBytesPerChecksum(HStore.getBytesPerChecksum(conf));
meta.setBlocksize(blockSize);
if (dataBlockEncodingStr != null) {
meta.setEncodingInCache(DataBlockEncoding.valueOf(dataBlockEncodingStr));
meta.setEncodingOnDisk(DataBlockEncoding.valueOf(dataBlockEncodingStr));
}
wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs)
.withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR)
.withFileContext(meta)
.build();
this.writers.put(family, wl);

View File

@ -51,12 +51,12 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
@ -70,8 +70,7 @@ import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.regionserver.BloomType;
@ -646,9 +645,6 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
CacheConfig cacheConf = new CacheConfig(conf);
HalfStoreFileReader halfReader = null;
StoreFile.Writer halfWriter = null;
HFileDataBlockEncoder dataBlockEncoder = new HFileDataBlockEncoderImpl(
familyDescriptor.getDataBlockEncodingOnDisk(),
familyDescriptor.getDataBlockEncoding());
try {
halfReader = new HalfStoreFileReader(fs, inFile, cacheConf,
reference, DataBlockEncoding.NONE);
@ -658,14 +654,18 @@ public class LoadIncrementalHFiles extends Configured implements Tool {
Algorithm compression = familyDescriptor.getCompression();
BloomType bloomFilterType = familyDescriptor.getBloomFilterType();
HFileContext meta = new HFileContext();
meta.setCompressAlgo(compression);
meta.setChecksumType(HStore.getChecksumType(conf));
meta.setBytesPerChecksum(HStore.getBytesPerChecksum(conf));
meta.setBlocksize(blocksize);
meta.setEncodingInCache(familyDescriptor.getDataBlockEncoding());
meta.setEncodingOnDisk(familyDescriptor.getDataBlockEncodingOnDisk());
halfWriter = new StoreFile.WriterBuilder(conf, cacheConf,
fs, blocksize)
fs)
.withFilePath(outFile)
.withCompression(compression)
.withDataBlockEncoder(dataBlockEncoder)
.withBloomType(bloomFilterType)
.withChecksumType(HStore.getChecksumType(conf))
.withBytesPerChecksum(HStore.getBytesPerChecksum(conf))
.withFileContext(meta)
.build();
HFileScanner scanner = halfReader.getScanner(false, false, false);
scanner.seekTo();

View File

@ -26,7 +26,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.UUID;
import org.apache.hadoop.classification.InterfaceAudience;
@ -42,7 +41,6 @@ import org.apache.hadoop.hbase.protobuf.generated.WALProtos;
import org.apache.hadoop.hbase.regionserver.wal.HLog;
import org.apache.hadoop.hbase.regionserver.wal.HLogKey;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import com.google.protobuf.ByteString;

View File

@ -73,7 +73,7 @@ public class DefaultStoreFlusher extends StoreFlusher {
status.setStatus("Flushing " + store + ": creating writer");
// Write the map out to the disk
writer = store.createWriterInTmp(
snapshot.size(), store.getFamily().getCompression(), false, true);
snapshot.size(), store.getFamily().getCompression(), false, true, true);
writer.setTimeRangeTracker(snapshotTimeRangeTracker);
try {
flushed = performFlush(scanner, writer, smallestReadPoint);

View File

@ -47,8 +47,8 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.Reference;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Threads;
/**

View File

@ -58,6 +58,7 @@ import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
@ -776,11 +777,13 @@ public class HStore implements Store {
* @param maxKeyCount
* @param compression Compression algorithm to use
* @param isCompaction whether we are creating a new file in a compaction
* @param includesMVCCReadPoint - whether to include MVCC or not
* @param includesTag - includesTag or not
* @return Writer for a new StoreFile in the tmp dir.
*/
@Override
public StoreFile.Writer createWriterInTmp(long maxKeyCount,
Compression.Algorithm compression, boolean isCompaction, boolean includeMVCCReadpoint)
public StoreFile.Writer createWriterInTmp(long maxKeyCount, Compression.Algorithm compression,
boolean isCompaction, boolean includeMVCCReadpoint, boolean includesTag)
throws IOException {
final CacheConfig writerCacheConf;
if (isCompaction) {
@ -795,21 +798,36 @@ public class HStore implements Store {
favoredNodes = region.getRegionServerServices().getFavoredNodesForRegion(
region.getRegionInfo().getEncodedName());
}
HFileContext hFileContext = createFileContext(compression, includeMVCCReadpoint, includesTag);
StoreFile.Writer w = new StoreFile.WriterBuilder(conf, writerCacheConf,
this.getFileSystem(), blocksize)
this.getFileSystem())
.withFilePath(fs.createTempName())
.withDataBlockEncoder(dataBlockEncoder)
.withComparator(comparator)
.withBloomType(family.getBloomFilterType())
.withMaxKeyCount(maxKeyCount)
.withChecksumType(checksumType)
.withBytesPerChecksum(bytesPerChecksum)
.withCompression(compression)
.withFavoredNodes(favoredNodes)
.includeMVCCReadpoint(includeMVCCReadpoint)
.withFileContext(hFileContext)
.build();
return w;
}
private HFileContext createFileContext(Compression.Algorithm compression,
boolean includeMVCCReadpoint, boolean includesTag) {
HFileContext hFileContext = new HFileContext();
hFileContext.setIncludesMvcc(includeMVCCReadpoint);
hFileContext.setIncludesTags(includesTag);
if (compression == null) {
compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
}
hFileContext.setCompressAlgo(compression);
hFileContext.setChecksumType(checksumType);
hFileContext.setBytesPerChecksum(bytesPerChecksum);
hFileContext.setBlocksize(blocksize);
hFileContext.setEncodingInCache(family.getDataBlockEncoding());
hFileContext.setEncodingOnDisk(family.getDataBlockEncodingOnDisk());
return hFileContext;
}
/*
* Change storeFiles adding into place the Reader produced by this new flush.

View File

@ -163,7 +163,8 @@ public interface Store extends HeapSize, StoreConfigInformation {
long maxKeyCount,
Compression.Algorithm compression,
boolean isCompaction,
boolean includeMVCCReadpoint
boolean includeMVCCReadpoint,
boolean includesTags
) throws IOException;
// Compaction oriented methods

View File

@ -43,11 +43,11 @@ import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValue.KVComparator;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.BlockType;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.io.hfile.HFileWriterV2;
@ -59,7 +59,6 @@ import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.WritableUtils;
import com.google.common.base.Function;
@ -524,28 +523,19 @@ public class StoreFile {
private final Configuration conf;
private final CacheConfig cacheConf;
private final FileSystem fs;
private final int blockSize;
private Compression.Algorithm compressAlgo =
HFile.DEFAULT_COMPRESSION_ALGORITHM;
private HFileDataBlockEncoder dataBlockEncoder =
NoOpDataBlockEncoder.INSTANCE;
private KeyValue.KVComparator comparator = KeyValue.COMPARATOR;
private BloomType bloomType = BloomType.NONE;
private long maxKeyCount = 0;
private Path dir;
private Path filePath;
private InetSocketAddress[] favoredNodes;
private ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE;
private int bytesPerChecksum = HFile.DEFAULT_BYTES_PER_CHECKSUM;
private boolean includeMVCCReadpoint = true;
private HFileContext fileContext;
public WriterBuilder(Configuration conf, CacheConfig cacheConf,
FileSystem fs, int blockSize) {
FileSystem fs) {
this.conf = conf;
this.cacheConf = cacheConf;
this.fs = fs;
this.blockSize = blockSize;
}
/**
@ -572,12 +562,6 @@ public class StoreFile {
return this;
}
public WriterBuilder withCompression(Compression.Algorithm compressAlgo) {
Preconditions.checkNotNull(compressAlgo);
this.compressAlgo = compressAlgo;
return this;
}
/**
* @param favoredNodes an array of favored nodes or possibly null
* @return this (for chained invocation)
@ -587,12 +571,6 @@ public class StoreFile {
return this;
}
public WriterBuilder withDataBlockEncoder(HFileDataBlockEncoder encoder) {
Preconditions.checkNotNull(encoder);
this.dataBlockEncoder = encoder;
return this;
}
public WriterBuilder withComparator(KeyValue.KVComparator comparator) {
Preconditions.checkNotNull(comparator);
this.comparator = comparator;
@ -614,33 +592,10 @@ public class StoreFile {
return this;
}
/**
* @param checksumType the type of checksum
* @return this (for chained invocation)
*/
public WriterBuilder withChecksumType(ChecksumType checksumType) {
this.checksumType = checksumType;
public WriterBuilder withFileContext(HFileContext fileContext) {
this.fileContext = fileContext;
return this;
}
/**
* @param bytesPerChecksum the number of bytes per checksum chunk
* @return this (for chained invocation)
*/
public WriterBuilder withBytesPerChecksum(int bytesPerChecksum) {
this.bytesPerChecksum = bytesPerChecksum;
return this;
}
/**
* @param includeMVCCReadpoint whether to write the mvcc readpoint to the file for each KV
* @return this (for chained invocation)
*/
public WriterBuilder includeMVCCReadpoint(boolean includeMVCCReadpoint) {
this.includeMVCCReadpoint = includeMVCCReadpoint;
return this;
}
/**
* Create a store file writer. Client is responsible for closing file when
* done. If metadata, add BEFORE closing using
@ -667,15 +622,11 @@ public class StoreFile {
}
}
if (compressAlgo == null) {
compressAlgo = HFile.DEFAULT_COMPRESSION_ALGORITHM;
}
if (comparator == null) {
comparator = KeyValue.COMPARATOR;
}
return new Writer(fs, filePath, blockSize, compressAlgo, dataBlockEncoder,
conf, cacheConf, comparator, bloomType, maxKeyCount, checksumType,
bytesPerChecksum, includeMVCCReadpoint, favoredNodes);
return new Writer(fs, filePath,
conf, cacheConf, comparator, bloomType, maxKeyCount, favoredNodes, fileContext);
}
}
@ -747,7 +698,6 @@ public class StoreFile {
private KeyValue lastDeleteFamilyKV = null;
private long deleteFamilyCnt = 0;
protected HFileDataBlockEncoder dataBlockEncoder;
/** Checksum type */
protected ChecksumType checksumType;
@ -770,39 +720,26 @@ public class StoreFile {
* Creates an HFile.Writer that also write helpful meta data.
* @param fs file system to write to
* @param path file name to create
* @param blocksize HDFS block size
* @param compress HDFS block compression
* @param conf user configuration
* @param comparator key comparator
* @param bloomType bloom filter setting
* @param maxKeys the expected maximum number of keys to be added. Was used
* for Bloom filter size in {@link HFile} format version 1.
* @param checksumType the checksum type
* @param bytesPerChecksum the number of bytes per checksum value
* @param includeMVCCReadpoint whether to write the mvcc readpoint to the file for each KV
* @param favoredNodes
* @param fileContext - The HFile context
* @throws IOException problem writing to FS
*/
private Writer(FileSystem fs, Path path, int blocksize,
Compression.Algorithm compress,
HFileDataBlockEncoder dataBlockEncoder, final Configuration conf,
private Writer(FileSystem fs, Path path,
final Configuration conf,
CacheConfig cacheConf,
final KVComparator comparator, BloomType bloomType, long maxKeys,
final ChecksumType checksumType, final int bytesPerChecksum,
final boolean includeMVCCReadpoint, InetSocketAddress[] favoredNodes)
InetSocketAddress[] favoredNodes, HFileContext fileContext)
throws IOException {
this.dataBlockEncoder = dataBlockEncoder != null ?
dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
writer = HFile.getWriterFactory(conf, cacheConf)
.withPath(fs, path)
.withBlockSize(blocksize)
.withCompression(compress)
.withDataBlockEncoder(this.dataBlockEncoder)
.withComparator(comparator)
.withChecksumType(checksumType)
.withBytesPerChecksum(bytesPerChecksum)
.withFavoredNodes(favoredNodes)
.includeMVCCReadpoint(includeMVCCReadpoint)
.withFileContext(fileContext)
.create();
this.kvComparator = comparator;
@ -833,8 +770,6 @@ public class StoreFile {
if (LOG.isTraceEnabled()) LOG.trace("Delete Family Bloom filter type for " + path + ": "
+ deleteFamilyBloomFilterWriter.getClass().getSimpleName());
}
this.checksumType = checksumType;
this.bytesPerChecksum = bytesPerChecksum;
}
/**

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.CellOutputStream;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
import org.apache.hadoop.hbase.io.hfile.HFileWriterV2;
import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
@ -118,6 +119,8 @@ public abstract class Compactor {
public long maxSeqId = 0;
/** Latest memstore read point found in any of the involved files */
public long maxMVCCReadpoint = 0;
/** Max tags length**/
public int maxTagsLength = 0;
}
protected FileDetails getFileDetails(
@ -143,6 +146,10 @@ public abstract class Compactor {
if (tmp != null) {
fd.maxMVCCReadpoint = Math.max(fd.maxMVCCReadpoint, Bytes.toLong(tmp));
}
tmp = fileInfo.get(FileInfo.MAX_TAGS_LEN);
if (tmp != null) {
fd.maxTagsLength = Math.max(fd.maxTagsLength, Bytes.toInt(tmp));
}
// If required, calculate the earliest put timestamp of all involved storefiles.
// This is used to remove family delete marker during compaction.
long earliestPutTs = 0;

View File

@ -71,7 +71,7 @@ public class DefaultCompactor extends Compactor {
// Create the writer even if no kv(Empty store file is also ok),
// because we need record the max seq id for the store file, see HBASE-6059
writer = store.createWriterInTmp(fd.maxKeyCount, this.compactionCompression, true,
fd.maxMVCCReadpoint >= smallestReadPoint);
fd.maxMVCCReadpoint >= smallestReadPoint, fd.maxTagsLength > 0);
boolean finished = performCompaction(scanner, writer, smallestReadPoint);
if (!finished) {
abortWriter(writer);

View File

@ -290,6 +290,9 @@ public class HLogPrettyPrinter {
+ op.get("qualifier"));
out.println(" timestamp: "
+ (new Date((Long) op.get("timestamp"))));
if(op.get("tag") != null) {
out.println(" tag: " + op.get("tag"));
}
if (outputValues)
out.println(" value: " + op.get("value"));
}

View File

@ -50,7 +50,8 @@ class KeyValueCompression {
throws IOException {
int keylength = WritableUtils.readVInt(in);
int vlength = WritableUtils.readVInt(in);
int length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength;
int tagsLength = WritableUtils.readVInt(in);
int length = (int) KeyValue.getKeyValueDataStructureSize(keylength, vlength, tagsLength);
byte[] backingArray = new byte[length];
int pos = 0;
@ -79,7 +80,7 @@ class KeyValueCompression {
// the rest
in.readFully(backingArray, pos, length - pos);
return new KeyValue(backingArray);
return new KeyValue(backingArray, 0, length);
}
private static void checkLength(int len, int max) throws IOException {
@ -105,6 +106,7 @@ class KeyValueCompression {
// we first write the KeyValue infrastructure as VInts.
WritableUtils.writeVInt(out, keyVal.getKeyLength());
WritableUtils.writeVInt(out, keyVal.getValueLength());
WritableUtils.writeVInt(out, keyVal.getTagsLength());
// now we write the row key, as the row key is likely to be repeated
// We save space only if we attempt to compress elements with duplicates

View File

@ -156,8 +156,8 @@ public class WALCellCodec implements Codec {
// We first write the KeyValue infrastructure as VInts.
StreamUtils.writeRawVInt32(out, kv.getKeyLength());
StreamUtils.writeRawVInt32(out, kv.getValueLength());
// To support tags. This will be replaced with kv.getTagsLength
StreamUtils.writeRawVInt32(out, (short)0);
// To support tags
StreamUtils.writeRawVInt32(out, kv.getTagsLength());
// Write row, qualifier, and family; use dictionary
// compression as they're likely to have duplicates.
@ -199,10 +199,13 @@ public class WALCellCodec implements Codec {
int keylength = StreamUtils.readRawVarint32(in);
int vlength = StreamUtils.readRawVarint32(in);
// To support Tags..Tags length will be 0.
// For now ignore the read value. This will be the tagslength
StreamUtils.readRawVarint32(in);
int length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength;
int tagsLength = StreamUtils.readRawVarint32(in);
int length = 0;
if(tagsLength == 0) {
length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength;
} else {
length = KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE + keylength + vlength + tagsLength;
}
byte[] backingArray = new byte[length];
int pos = 0;

View File

@ -1,97 +0,0 @@
/**
* Copyright The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.lang.ClassNotFoundException;
import java.util.zip.Checksum;
import java.lang.reflect.Constructor;
/**
* Utility class that is used to generate a Checksum object.
* The Checksum implementation is pluggable and an application
* can specify their own class that implements their own
* Checksum algorithm.
*/
public class ChecksumFactory {
static private final Class<?>[] EMPTY_ARRAY = new Class[]{};
/**
* Create a new instance of a Checksum object.
* @return The newly created Checksum object
*/
static public Checksum newInstance(String className) throws IOException {
try {
Class<?> clazz = getClassByName(className);
return (Checksum)newInstance(clazz);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
}
/**
* Returns a Constructor that can be used to create a Checksum object.
* @param className classname for which an constructor is created
* @return a new Constructor object
*/
static public Constructor<?> newConstructor(String className)
throws IOException {
try {
Class<?> clazz = getClassByName(className);
Constructor<?> ctor = clazz.getDeclaredConstructor(EMPTY_ARRAY);
ctor.setAccessible(true);
return ctor;
} catch (ClassNotFoundException e) {
throw new IOException(e);
} catch (java.lang.NoSuchMethodException e) {
throw new IOException(e);
}
}
/** Create an object for the given class and initialize it from conf
*
* @param theClass class of which an object is created
* @return a new object
*/
static private <T> T newInstance(Class<T> theClass) {
T result;
try {
Constructor<T> ctor = theClass.getDeclaredConstructor(EMPTY_ARRAY);
ctor.setAccessible(true);
result = ctor.newInstance();
} catch (Exception e) {
throw new RuntimeException(e);
}
return result;
}
/**
* Load a class by name.
* @param name the class name.
* @return the class object.
* @throws ClassNotFoundException if the class is not found.
*/
static private Class<?> getClassByName(String name)
throws ClassNotFoundException {
ClassLoader classLoader = Thread.currentThread().getContextClassLoader();
return Class.forName(name, true, classLoader);
}
}

View File

@ -1,180 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.util.zip.Checksum;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Checksum types. The Checksum type is a one byte number
* that stores a representation of the checksum algorithm
* used to encode a hfile. The ordinal of these cannot
* change or else you risk breaking all existing HFiles out there.
*/
public enum ChecksumType {
NULL((byte)0) {
@Override
public String getName() {
return "NULL";
}
@Override
public void initialize() {
// do nothing
}
@Override
public Checksum getChecksumObject() throws IOException {
return null; // checksums not used
}
},
CRC32((byte)1) {
private volatile Constructor<?> ctor;
@Override
public String getName() {
return "CRC32";
}
@Override
public void initialize() {
final String PURECRC32 = "org.apache.hadoop.util.PureJavaCrc32";
final String JDKCRC = "java.util.zip.CRC32";
LOG = LogFactory.getLog(ChecksumType.class);
// check if hadoop library is available
try {
ctor = ChecksumFactory.newConstructor(PURECRC32);
LOG.info("Checksum using " + PURECRC32);
} catch (Exception e) {
LOG.trace(PURECRC32 + " not available.");
}
try {
// The default checksum class name is java.util.zip.CRC32.
// This is available on all JVMs.
if (ctor == null) {
ctor = ChecksumFactory.newConstructor(JDKCRC);
LOG.info("Checksum can use " + JDKCRC);
}
} catch (Exception e) {
LOG.trace(JDKCRC + " not available.");
}
}
@Override
public Checksum getChecksumObject() throws IOException {
if (ctor == null) {
throw new IOException("Bad constructor for " + getName());
}
try {
return (Checksum)ctor.newInstance();
} catch (Exception e) {
throw new IOException(e);
}
}
},
CRC32C((byte)2) {
private transient Constructor<?> ctor;
@Override
public String getName() {
return "CRC32C";
}
@Override
public void initialize() {
final String PURECRC32C = "org.apache.hadoop.util.PureJavaCrc32C";
LOG = LogFactory.getLog(ChecksumType.class);
try {
ctor = ChecksumFactory.newConstructor(PURECRC32C);
LOG.info("Checksum can use " + PURECRC32C);
} catch (Exception e) {
LOG.trace(PURECRC32C + " not available.");
}
}
@Override
public Checksum getChecksumObject() throws IOException {
if (ctor == null) {
throw new IOException("Bad constructor for " + getName());
}
try {
return (Checksum)ctor.newInstance();
} catch (Exception e) {
throw new IOException(e);
}
}
};
private final byte code;
protected Log LOG;
/** initializes the relevant checksum class object */
abstract void initialize();
/** returns the name of this checksum type */
public abstract String getName();
private ChecksumType(final byte c) {
this.code = c;
initialize();
}
/** returns a object that can be used to generate/validate checksums */
public abstract Checksum getChecksumObject() throws IOException;
public byte getCode() {
return this.code;
}
/**
* Cannot rely on enum ordinals . They change if item is removed or moved.
* Do our own codes.
* @param b
* @return Type associated with passed code.
*/
public static ChecksumType codeToType(final byte b) {
for (ChecksumType t : ChecksumType.values()) {
if (t.getCode() == b) {
return t;
}
}
throw new RuntimeException("Unknown checksum type code " + b);
}
/**
* Map a checksum name to a specific type.
* Do our own names.
* @param name
* @return Type associated with passed code.
*/
public static ChecksumType nameToType(final String name) {
for (ChecksumType t : ChecksumType.values()) {
if (t.getName().equals(name)) {
return t;
}
}
throw new RuntimeException("Unknown checksum type name " + name);
}
}

View File

@ -34,6 +34,8 @@ import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.io.compress.Compressor;
/**
@ -112,9 +114,11 @@ public class CompressionTest {
public static void doSmokeTest(FileSystem fs, Path path, String codec)
throws Exception {
Configuration conf = HBaseConfiguration.create();
HFileContext context = new HFileContext();
context.setCompressAlgo(AbstractHFileWriter.compressionByName(codec));
HFile.Writer writer = HFile.getWriterFactoryNoCache(conf)
.withPath(fs, path)
.withCompression(codec)
.withFileContext(context)
.create();
writer.append(Bytes.toBytes("testkey"), Bytes.toBytes("testval"));
writer.appendFileInfo(Bytes.toBytes("infokey"), Bytes.toBytes("infoval"));

View File

@ -178,13 +178,15 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
{ Compression.Algorithm.GZ }
});
/** This is for unit tests parameterized with a single boolean. */
/** This is for unit tests parameterized with a two booleans. */
public static final List<Object[]> BOOLEAN_PARAMETERIZED =
Arrays.asList(new Object[][] {
{ new Boolean(false) },
{ new Boolean(true) }
});
/** This is for unit tests parameterized with a single boolean. */
public static final List<Object[]> MEMSTORETS_TAGS_PARAMETRIZED = memStoreTSAndTagsCombination() ;
/** Compression algorithms to use in testing */
public static final Compression.Algorithm[] COMPRESSION_ALGORITHMS ={
Compression.Algorithm.NONE, Compression.Algorithm.GZ
@ -205,6 +207,18 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
return Collections.unmodifiableList(configurations);
}
/**
* Create combination of memstoreTS and tags
*/
private static List<Object[]> memStoreTSAndTagsCombination() {
List<Object[]> configurations = new ArrayList<Object[]>();
configurations.add(new Object[] { false, false });
configurations.add(new Object[] { false, true });
configurations.add(new Object[] { true, false });
configurations.add(new Object[] { true, true });
return Collections.unmodifiableList(configurations);
}
public static final Collection<Object[]> BLOOM_AND_COMPRESSION_COMBINATIONS =
bloomAndCompressionCombinations();

View File

@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.util.Bytes;
@ -188,10 +189,12 @@ public class HFilePerformanceEvaluation {
@Override
void setUp() throws Exception {
HFileContext hFileContext = new HFileContext();
hFileContext.setBlocksize(RFILE_BLOCKSIZE);
writer =
HFile.getWriterFactoryNoCache(conf)
.withPath(fs, mf)
.withBlockSize(RFILE_BLOCKSIZE)
.withFileContext(hFileContext)
.create();
}

View File

@ -22,26 +22,27 @@ import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.PrintStream;
import java.io.File;
import java.lang.reflect.Constructor;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.Arrays;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.lang.reflect.Constructor;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HConnection;
@ -51,21 +52,19 @@ import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.filter.WhileMatchFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PageFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.WhileMatchFilter;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Hash;
import org.apache.hadoop.hbase.util.MurmurHash;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
@ -79,9 +78,9 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer;
import org.apache.hadoop.util.LineReader;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.util.LineReader;
/**
@ -104,9 +103,11 @@ public class PerformanceEvaluation extends Configured implements Tool {
protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName());
private static final int DEFAULT_ROW_PREFIX_LENGTH = 16;
private static final int VALUE_LENGTH = 1000;
public static final int VALUE_LENGTH = 1000;
private static final int ONE_GB = 1024 * 1024 * 1000;
private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH;
// TODO : should we make this configurable
private static final int TAG_LENGTH = 256;
public static final byte[] COMPRESSION = Bytes.toBytes("NONE");
public static final TableName TABLE_NAME =
@ -129,6 +130,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
private boolean writeToWAL = true;
private boolean inMemoryCF = false;
private int presplitRegions = 0;
private boolean useTags = false;
private int noOfTags = 1;
private HConnection connection;
private static final Path PERF_EVAL_DIR = new Path("performance_evaluation");
@ -217,6 +220,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
private int clients = 0;
private boolean flushCommits = false;
private boolean writeToWAL = true;
private boolean useTags = false;
private int noOfTags = 0;
public PeInputSplit() {
this.startRow = 0;
@ -225,16 +230,20 @@ public class PerformanceEvaluation extends Configured implements Tool {
this.clients = 0;
this.flushCommits = false;
this.writeToWAL = true;
this.useTags = false;
this.noOfTags = 0;
}
public PeInputSplit(int startRow, int rows, int totalRows, int clients,
boolean flushCommits, boolean writeToWAL) {
boolean flushCommits, boolean writeToWAL, boolean useTags, int noOfTags) {
this.startRow = startRow;
this.rows = rows;
this.totalRows = totalRows;
this.clients = clients;
this.flushCommits = flushCommits;
this.writeToWAL = writeToWAL;
this.useTags = useTags;
this.noOfTags = noOfTags;
}
@Override
@ -245,6 +254,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
this.clients = in.readInt();
this.flushCommits = in.readBoolean();
this.writeToWAL = in.readBoolean();
this.useTags = in.readBoolean();
this.noOfTags = in.readInt();
}
@Override
@ -255,6 +266,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
out.writeInt(clients);
out.writeBoolean(flushCommits);
out.writeBoolean(writeToWAL);
out.writeBoolean(useTags);
out.writeInt(noOfTags);
}
@Override
@ -290,6 +303,14 @@ public class PerformanceEvaluation extends Configured implements Tool {
public boolean isWriteToWAL() {
return writeToWAL;
}
public boolean isUseTags() {
return useTags;
}
public int getNoOfTags() {
return noOfTags;
}
}
/**
@ -326,6 +347,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
int clients = Integer.parseInt(m.group(4));
boolean flushCommits = Boolean.parseBoolean(m.group(5));
boolean writeToWAL = Boolean.parseBoolean(m.group(6));
boolean useTags = Boolean.parseBoolean(m.group(7));
int noOfTags = Integer.parseInt(m.group(8));
LOG.debug("split["+ splitList.size() + "] " +
" startRow=" + startRow +
@ -333,11 +356,13 @@ public class PerformanceEvaluation extends Configured implements Tool {
" totalRows=" + totalRows +
" clients=" + clients +
" flushCommits=" + flushCommits +
" writeToWAL=" + writeToWAL);
" writeToWAL=" + writeToWAL +
" useTags=" + useTags +
" noOfTags=" +noOfTags);
PeInputSplit newSplit =
new PeInputSplit(startRow, rows, totalRows, clients,
flushCommits, writeToWAL);
flushCommits, writeToWAL, useTags, noOfTags);
splitList.add(newSplit);
}
}
@ -457,9 +482,10 @@ public class PerformanceEvaluation extends Configured implements Tool {
// Evaluation task
long elapsedTime = this.pe.runOneClient(this.cmd, value.getStartRow(),
value.getRows(), value.getTotalRows(),
value.isFlushCommits(), value.isWriteToWAL(),
HConnectionManager.createConnection(context.getConfiguration()), status);
value.getRows(), value.getTotalRows(),
value.isFlushCommits(), value.isWriteToWAL(),
value.isUseTags(), value.getNoOfTags(),
HConnectionManager.createConnection(context.getConfiguration()), status);
// Collect how much time the thing took. Report as map output and
// to the ELAPSED_TIME counter.
context.getCounter(Counter.ELAPSED_TIME).increment(elapsedTime);
@ -566,6 +592,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
final Compression.Algorithm compression = this.compression;
final boolean writeToWal = this.writeToWAL;
final int preSplitRegions = this.presplitRegions;
final boolean useTags = this.useTags;
final int numTags = this.noOfTags;
final HConnection connection = HConnectionManager.createConnection(getConf());
for (int i = 0; i < this.N; i++) {
final int index = i;
@ -582,14 +610,16 @@ public class PerformanceEvaluation extends Configured implements Tool {
pe.presplitRegions = preSplitRegions;
pe.N = N;
pe.connection = connection;
pe.useTags = useTags;
pe.noOfTags = numTags;
try {
long elapsedTime = pe.runOneClient(cmd, index * perClientRows,
perClientRows, R,
flushCommits, writeToWAL, connection, new Status() {
public void setStatus(final String msg) throws IOException {
LOG.info("client-" + getName() + " " + msg);
}
});
perClientRows, R,
flushCommits, writeToWAL, useTags, noOfTags, connection, new Status() {
public void setStatus(final String msg) throws IOException {
LOG.info("client-" + getName() + " " + msg);
}
});
timings[index] = elapsedTime;
LOG.info("Finished " + getName() + " in " + elapsedTime +
"ms writing " + perClientRows + " rows");
@ -748,14 +778,16 @@ public class PerformanceEvaluation extends Configured implements Tool {
private TableName tableName;
private boolean flushCommits;
private boolean writeToWAL = true;
private boolean useTags = false;
private int noOfTags = 0;
private HConnection connection;
TestOptions() {
}
TestOptions(int startRow, int perClientRunRows, int totalRows,
int numClientThreads, TableName tableName,
boolean flushCommits, boolean writeToWAL, HConnection connection) {
TestOptions(int startRow, int perClientRunRows, int totalRows, int numClientThreads,
TableName tableName, boolean flushCommits, boolean writeToWAL, boolean useTags,
int noOfTags, HConnection connection) {
this.startRow = startRow;
this.perClientRunRows = perClientRunRows;
this.totalRows = totalRows;
@ -763,6 +795,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
this.tableName = tableName;
this.flushCommits = flushCommits;
this.writeToWAL = writeToWAL;
this.useTags = useTags;
this.noOfTags = noOfTags;
this.connection = connection;
}
@ -797,6 +831,13 @@ public class PerformanceEvaluation extends Configured implements Tool {
public HConnection getConnection() {
return connection;
}
public boolean isUseTags() {
return this.useTags;
}
public int getNumTags() {
return this.noOfTags;
}
}
/*
@ -822,6 +863,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
protected volatile Configuration conf;
protected boolean flushCommits;
protected boolean writeToWAL;
protected boolean useTags;
protected int noOfTags;
protected HConnection connection;
/**
@ -839,6 +882,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
this.conf = conf;
this.flushCommits = options.isFlushCommits();
this.writeToWAL = options.isWriteToWAL();
this.useTags = options.isUseTags();
this.noOfTags = options.getNumTags();
this.connection = options.getConnection();
}
@ -1041,10 +1086,20 @@ public class PerformanceEvaluation extends Configured implements Tool {
@Override
void testRow(final int i) throws IOException {
byte [] row = getRandomRow(this.rand, this.totalRows);
byte[] row = getRandomRow(this.rand, this.totalRows);
Put put = new Put(row);
byte[] value = generateValue(this.rand);
put.add(FAMILY_NAME, QUALIFIER_NAME, value);
byte[] value = generateData(this.rand, VALUE_LENGTH);
if (useTags) {
byte[] tag = generateData(this.rand, TAG_LENGTH);
Tag[] tags = new Tag[noOfTags];
for (int n = 0; n < noOfTags; n++) {
Tag t = new Tag((byte) n, tag);
tags[n] = t;
}
put.add(FAMILY_NAME, QUALIFIER_NAME, value, tags);
} else {
put.add(FAMILY_NAME, QUALIFIER_NAME, value);
}
put.setDurability(writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
table.put(put);
}
@ -1102,8 +1157,18 @@ public class PerformanceEvaluation extends Configured implements Tool {
@Override
void testRow(final int i) throws IOException {
Put put = new Put(format(i));
byte[] value = generateValue(this.rand);
put.add(FAMILY_NAME, QUALIFIER_NAME, value);
byte[] value = generateData(this.rand, VALUE_LENGTH);
if (useTags) {
byte[] tag = generateData(this.rand, TAG_LENGTH);
Tag[] tags = new Tag[noOfTags];
for (int n = 0; n < noOfTags; n++) {
Tag t = new Tag((byte) n, tag);
tags[n] = t;
}
put.add(FAMILY_NAME, QUALIFIER_NAME, value, tags);
} else {
put.add(FAMILY_NAME, QUALIFIER_NAME, value);
}
put.setDurability(writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
table.put(put);
}
@ -1119,7 +1184,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
@Override
void testRow(int i) throws IOException {
byte[] value = generateValue(this.rand);
byte[] value = generateData(this.rand, VALUE_LENGTH);
Scan scan = constructScan(value);
ResultScanner scanner = null;
try {
@ -1165,11 +1230,11 @@ public class PerformanceEvaluation extends Configured implements Tool {
* consumes about 30% of CPU time.
* @return Generated random value to insert into a table cell.
*/
public static byte[] generateValue(final Random r) {
byte [] b = new byte [VALUE_LENGTH];
public static byte[] generateData(final Random r, int length) {
byte [] b = new byte [length];
int i = 0;
for(i = 0; i < (VALUE_LENGTH-8); i += 8) {
for(i = 0; i < (length-8); i += 8) {
b[i] = (byte) (65 + r.nextInt(26));
b[i+1] = b[i];
b[i+2] = b[i];
@ -1181,7 +1246,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
}
byte a = (byte) (65 + r.nextInt(26));
for(; i < VALUE_LENGTH; i++) {
for(; i < length; i++) {
b[i] = a;
}
return b;
@ -1192,16 +1257,16 @@ public class PerformanceEvaluation extends Configured implements Tool {
}
long runOneClient(final Class<? extends Test> cmd, final int startRow,
final int perClientRunRows, final int totalRows,
boolean flushCommits, boolean writeToWAL, HConnection connection,
final Status status)
final int perClientRunRows, final int totalRows,
boolean flushCommits, boolean writeToWAL, boolean useTags, int noOfTags,
HConnection connection, final Status status)
throws IOException {
status.setStatus("Start " + cmd + " at offset " + startRow + " for " +
perClientRunRows + " rows");
long totalElapsedTime = 0;
TestOptions options = new TestOptions(startRow, perClientRunRows,
totalRows, N, tableName, flushCommits, writeToWAL, connection);
totalRows, N, tableName, flushCommits, writeToWAL, useTags, noOfTags, connection);
final Test t;
try {
Constructor<? extends Test> constructor = cmd.getDeclaredConstructor(
@ -1233,8 +1298,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
try {
admin = new HBaseAdmin(getConf());
checkTable(admin);
runOneClient(cmd, 0, this.R, this.R, this.flushCommits, this.writeToWAL, this.connection,
status);
runOneClient(cmd, 0, this.R, this.R, this.flushCommits, this.writeToWAL,
this.useTags, this.noOfTags, this.connection, status);
} catch (Exception e) {
LOG.error("Failed", e);
}
@ -1276,6 +1341,9 @@ public class PerformanceEvaluation extends Configured implements Tool {
System.err
.println(" inmemory Tries to keep the HFiles of the CF inmemory as far as possible. Not " +
"guaranteed that reads are always served from inmemory. Default: false");
System.err.println(" usetags Writes tags along with KVs. Use with HFile V3. Default : false");
System.err
.println(" numoftags Specify the no of tags that would be needed. This works only if usetags is true.");
System.err.println();
System.err.println(" Note: -D properties will be applied to the conf used. ");
System.err.println(" For example: ");
@ -1383,6 +1451,18 @@ public class PerformanceEvaluation extends Configured implements Tool {
this.connection = HConnectionManager.createConnection(getConf());
final String useTags = "--usetags=";
if (cmd.startsWith(useTags)) {
this.useTags = Boolean.parseBoolean(cmd.substring(useTags.length()));
continue;
}
final String noOfTags = "--nooftags=";
if (cmd.startsWith(noOfTags)) {
this.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length()));
continue;
}
Class<? extends Test> cmdClass = determineCommandClass(cmd);
if (cmdClass != null) {
getArgs(i + 1, args);

View File

@ -59,6 +59,7 @@ import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.regionserver.HRegion;
@ -577,8 +578,10 @@ public class TestRegionObserverInterface {
Configuration conf,
FileSystem fs, Path path,
byte[] family, byte[] qualifier) throws IOException {
HFileContext context = new HFileContext();
HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf))
.withPath(fs, path)
.withFileContext(context)
.create();
long now = System.currentTimeMillis();
try {

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.AfterClass;
@ -82,10 +83,11 @@ public class TestHalfStoreFileReader {
Configuration conf = TEST_UTIL.getConfiguration();
FileSystem fs = FileSystem.get(conf);
CacheConfig cacheConf = new CacheConfig(conf);
HFileContext meta = new HFileContext();
meta.setBlocksize(1024);
HFile.Writer w = HFile.getWriterFactory(conf, cacheConf)
.withPath(fs, p)
.withBlockSize(1024)
.withFileContext(meta)
.create();
// write some things.
@ -147,10 +149,11 @@ public class TestHalfStoreFileReader {
Configuration conf = TEST_UTIL.getConfiguration();
FileSystem fs = FileSystem.get(conf);
CacheConfig cacheConf = new CacheConfig(conf);
HFileContext meta = new HFileContext();
meta.setBlocksize(1024);
HFile.Writer w = HFile.getWriterFactory(conf, cacheConf)
.withPath(fs, p)
.withBlockSize(1024)
.withFileContext(meta)
.create();
// write some things.

View File

@ -31,9 +31,11 @@ import java.util.Random;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.LargeTests;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.test.RedundantKVGenerator;
import org.junit.Test;
@ -43,82 +45,98 @@ import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Test all of the data block encoding algorithms for correctness.
* Most of the class generate data which will test different branches in code.
* Test all of the data block encoding algorithms for correctness. Most of the
* class generate data which will test different branches in code.
*/
@Category(LargeTests.class)
@RunWith(Parameterized.class)
public class TestDataBlockEncoders {
static int NUMBER_OF_KV = 10000;
static int NUM_RANDOM_SEEKS = 10000;
private static int ENCODED_DATA_OFFSET =
HConstants.HFILEBLOCK_HEADER_SIZE + DataBlockEncoding.ID_SIZE;
private static int NUMBER_OF_KV = 10000;
private static int NUM_RANDOM_SEEKS = 10000;
private static int ENCODED_DATA_OFFSET = HConstants.HFILEBLOCK_HEADER_SIZE
+ DataBlockEncoding.ID_SIZE;
private RedundantKVGenerator generator = new RedundantKVGenerator();
private Random randomizer = new Random(42l);
private final boolean includesMemstoreTS;
private final boolean includesTags;
@Parameters
public static Collection<Object[]> parameters() {
return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED;
}
public TestDataBlockEncoders(boolean includesMemstoreTS) {
public TestDataBlockEncoders(boolean includesMemstoreTS, boolean includesTag) {
this.includesMemstoreTS = includesMemstoreTS;
this.includesTags = includesTag;
}
private HFileBlockEncodingContext getEncodingContext(
Compression.Algorithm algo, DataBlockEncoding encoding) {
private HFileBlockEncodingContext getEncodingContext(Compression.Algorithm algo,
DataBlockEncoding encoding) {
DataBlockEncoder encoder = encoding.getEncoder();
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTags);
meta.setCompressAlgo(algo);
if (encoder != null) {
return encoder.newDataBlockEncodingContext(algo, encoding,
HConstants.HFILEBLOCK_DUMMY_HEADER);
return encoder.newDataBlockEncodingContext(encoding,
HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
} else {
return new HFileBlockDefaultEncodingContext(algo, encoding, HConstants.HFILEBLOCK_DUMMY_HEADER);
return new HFileBlockDefaultEncodingContext(encoding,
HConstants.HFILEBLOCK_DUMMY_HEADER, meta);
}
}
private byte[] encodeBytes(DataBlockEncoding encoding,
ByteBuffer dataset) throws IOException {
private byte[] encodeBytes(DataBlockEncoding encoding, ByteBuffer dataset)
throws IOException {
DataBlockEncoder encoder = encoding.getEncoder();
HFileBlockEncodingContext encodingCtx =
getEncodingContext(Compression.Algorithm.NONE, encoding);
HFileBlockEncodingContext encodingCtx = getEncodingContext(Compression.Algorithm.NONE,
encoding);
encoder.encodeKeyValues(dataset, includesMemstoreTS,
encodingCtx);
encoder.encodeKeyValues(dataset, encodingCtx);
byte[] encodedBytesWithHeader =
encodingCtx.getUncompressedBytesWithHeader();
byte[] encodedData =
new byte[encodedBytesWithHeader.length - ENCODED_DATA_OFFSET];
System.arraycopy(encodedBytesWithHeader, ENCODED_DATA_OFFSET, encodedData,
0, encodedData.length);
byte[] encodedBytesWithHeader = encodingCtx.getUncompressedBytesWithHeader();
byte[] encodedData = new byte[encodedBytesWithHeader.length - ENCODED_DATA_OFFSET];
System.arraycopy(encodedBytesWithHeader, ENCODED_DATA_OFFSET, encodedData, 0,
encodedData.length);
return encodedData;
}
private void testAlgorithm(ByteBuffer dataset, DataBlockEncoding encoding)
throws IOException {
private void testAlgorithm(ByteBuffer dataset, DataBlockEncoding encoding,
List<KeyValue> kvList) throws IOException {
// encode
byte[] encodedBytes = encodeBytes(encoding, dataset);
//decode
// decode
ByteArrayInputStream bais = new ByteArrayInputStream(encodedBytes);
DataInputStream dis = new DataInputStream(bais);
ByteBuffer actualDataset;
DataBlockEncoder encoder = encoding.getEncoder();
actualDataset = encoder.decodeKeyValues(dis, includesMemstoreTS);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTags);
meta.setCompressAlgo(Compression.Algorithm.NONE);
actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta));
dataset.rewind();
actualDataset.rewind();
// this is because in case of prefix tree the decoded stream will not have
// the
// mvcc in it.
// if (encoding != DataBlockEncoding.PREFIX_TREE) {
assertEquals("Encoding -> decoding gives different results for " + encoder,
Bytes.toStringBinary(dataset), Bytes.toStringBinary(actualDataset));
// }
}
/**
* Test data block encoding of empty KeyValue.
* @throws IOException On test failure.
*
* @throws IOException
* On test failure.
*/
@Test
public void testEmptyKeyValues() throws IOException {
@ -127,15 +145,26 @@ public class TestDataBlockEncoders {
byte[] family = new byte[0];
byte[] qualifier = new byte[0];
byte[] value = new byte[0];
kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value));
testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList,
includesMemstoreTS));
if (!includesTags) {
kvList.add(new KeyValue(row, family, qualifier, 0l, value));
kvList.add(new KeyValue(row, family, qualifier, 0l, value));
} else {
byte[] metaValue1 = Bytes.toBytes("metaValue1");
byte[] metaValue2 = Bytes.toBytes("metaValue2");
kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
metaValue1) }));
kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
metaValue2) }));
}
testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
kvList);
}
/**
* Test KeyValues with negative timestamp.
* @throws IOException On test failure.
*
* @throws IOException
* On test failure.
*/
@Test
public void testNegativeTimestamps() throws IOException {
@ -144,13 +173,22 @@ public class TestDataBlockEncoders {
byte[] family = new byte[0];
byte[] qualifier = new byte[0];
byte[] value = new byte[0];
kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
testEncodersOnDataset(
RedundantKVGenerator.convertKvToByteBuffer(kvList,
includesMemstoreTS));
if (includesTags) {
byte[] metaValue1 = Bytes.toBytes("metaValue1");
byte[] metaValue2 = Bytes.toBytes("metaValue2");
kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
metaValue1) }));
kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1,
metaValue2) }));
} else {
kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value));
kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value));
}
testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
kvList);
}
/**
* Test whether compression -> decompression gives the consistent results on
* pseudorandom sample.
@ -158,41 +196,42 @@ public class TestDataBlockEncoders {
*/
@Test
public void testExecutionOnSample() throws IOException {
testEncodersOnDataset(
RedundantKVGenerator.convertKvToByteBuffer(
generator.generateTestKeyValues(NUMBER_OF_KV),
includesMemstoreTS));
List<KeyValue> kvList = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS),
kvList);
}
/**
* Test seeking while file is encoded.
*/
@Test
public void testSeekingOnSample() throws IOException{
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
ByteBuffer originalBuffer =
RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
includesMemstoreTS);
public void testSeekingOnSample() throws IOException {
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
includesMemstoreTS);
// create all seekers
List<DataBlockEncoder.EncodedSeeker> encodedSeekers =
new ArrayList<DataBlockEncoder.EncodedSeeker>();
List<DataBlockEncoder.EncodedSeeker> encodedSeekers = new ArrayList<DataBlockEncoder.EncodedSeeker>();
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
if (encoding.getEncoder() == null) {
continue;
}
ByteBuffer encodedBuffer =
ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
ByteBuffer encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
DataBlockEncoder encoder = encoding.getEncoder();
DataBlockEncoder.EncodedSeeker seeker =
encoder.createSeeker(KeyValue.COMPARATOR, includesMemstoreTS);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTags);
meta.setCompressAlgo(Compression.Algorithm.NONE);
DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
seeker.setCurrentBuffer(encodedBuffer);
encodedSeekers.add(seeker);
}
// test it!
// try a few random seeks
for (boolean seekBefore : new boolean[] {false, true}) {
for (boolean seekBefore : new boolean[] { false, true }) {
for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) {
int keyValueId;
if (!seekBefore) {
@ -208,46 +247,46 @@ public class TestDataBlockEncoders {
// check edge cases
checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0));
for (boolean seekBefore : new boolean[] {false, true}) {
checkSeekingConsistency(encodedSeekers, seekBefore,
sampleKv.get(sampleKv.size() - 1));
for (boolean seekBefore : new boolean[] { false, true }) {
checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1));
KeyValue midKv = sampleKv.get(sampleKv.size() / 2);
KeyValue lastMidKv = midKv.createLastOnRowCol();
checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv);
}
}
/**
* Test iterating on encoded buffers.
*/
@Test
public void testNextOnSample() {
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
ByteBuffer originalBuffer =
RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
includesMemstoreTS);
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
includesMemstoreTS);
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
if (encoding.getEncoder() == null) {
continue;
}
DataBlockEncoder encoder = encoding.getEncoder();
ByteBuffer encodedBuffer = null;
try {
encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
} catch (IOException e) {
throw new RuntimeException(String.format(
"Bug while encoding using '%s'", encoder.toString()), e);
throw new RuntimeException(String.format("Bug while encoding using '%s'",
encoder.toString()), e);
}
DataBlockEncoder.EncodedSeeker seeker =
encoder.createSeeker(KeyValue.COMPARATOR, includesMemstoreTS);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTags);
meta.setCompressAlgo(Compression.Algorithm.NONE);
DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
seeker.setCurrentBuffer(encodedBuffer);
int i = 0;
do {
KeyValue expectedKeyValue = sampleKv.get(i);
ByteBuffer keyValue = seeker.getKeyValueBuffer();
if (0 != Bytes.compareTo(
keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
if (0 != Bytes.compareTo(keyValue.array(), keyValue.arrayOffset(), keyValue.limit(),
expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(),
expectedKeyValue.getLength())) {
@ -257,19 +296,16 @@ public class TestDataBlockEncoders {
int leftOff = keyValue.arrayOffset();
int rightOff = expectedKeyValue.getOffset();
int length = Math.min(keyValue.limit(), expectedKeyValue.getLength());
while (commonPrefix < length &&
left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
while (commonPrefix < length
&& left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) {
commonPrefix++;
}
fail(String.format(
"next() produces wrong results " +
"encoder: %s i: %d commonPrefix: %d" +
"\n expected %s\n actual %s",
encoder.toString(), i, commonPrefix,
Bytes.toStringBinary(expectedKeyValue.getBuffer(),
expectedKeyValue.getOffset(), expectedKeyValue.getLength()),
Bytes.toStringBinary(keyValue)));
fail(String.format("next() produces wrong results "
+ "encoder: %s i: %d commonPrefix: %d" + "\n expected %s\n actual %s", encoder
.toString(), i, commonPrefix, Bytes.toStringBinary(expectedKeyValue.getBuffer(),
expectedKeyValue.getOffset(), expectedKeyValue.getLength()), Bytes
.toStringBinary(keyValue)));
}
i++;
} while (seeker.next());
@ -281,10 +317,9 @@ public class TestDataBlockEncoders {
*/
@Test
public void testFirstKeyInBlockOnSample() {
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV);
ByteBuffer originalBuffer =
RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
includesMemstoreTS);
List<KeyValue> sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags);
ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv,
includesMemstoreTS);
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
if (encoding.getEncoder() == null) {
@ -295,39 +330,35 @@ public class TestDataBlockEncoders {
try {
encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer));
} catch (IOException e) {
throw new RuntimeException(String.format(
"Bug while encoding using '%s'", encoder.toString()), e);
throw new RuntimeException(String.format("Bug while encoding using '%s'",
encoder.toString()), e);
}
ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer);
KeyValue firstKv = sampleKv.get(0);
if (0 != Bytes.compareTo(
keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
firstKv.getBuffer(), firstKv.getKeyOffset(),
firstKv.getKeyLength())) {
if (0 != Bytes.compareTo(keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(),
firstKv.getBuffer(), firstKv.getKeyOffset(), firstKv.getKeyLength())) {
int commonPrefix = 0;
int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength());
while (commonPrefix < length &&
keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] ==
firstKv.getBuffer()[firstKv.getKeyOffset() + commonPrefix]) {
while (commonPrefix < length
&& keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] == firstKv.getBuffer()[firstKv
.getKeyOffset() + commonPrefix]) {
commonPrefix++;
}
fail(String.format("Bug in '%s' commonPrefix %d",
encoder.toString(), commonPrefix));
fail(String.format("Bug in '%s' commonPrefix %d", encoder.toString(), commonPrefix));
}
}
}
private void checkSeekingConsistency(
List<DataBlockEncoder.EncodedSeeker> encodedSeekers, boolean seekBefore,
KeyValue keyValue) {
private void checkSeekingConsistency(List<DataBlockEncoder.EncodedSeeker> encodedSeekers,
boolean seekBefore, KeyValue keyValue) {
ByteBuffer expectedKeyValue = null;
ByteBuffer expectedKey = null;
ByteBuffer expectedValue = null;
for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) {
seeker.seekToKeyInBlock(keyValue.getBuffer(),
keyValue.getKeyOffset(), keyValue.getKeyLength(), seekBefore);
seeker.seekToKeyInBlock(keyValue.getBuffer(), keyValue.getKeyOffset(),
keyValue.getKeyLength(), seekBefore);
seeker.rewind();
ByteBuffer actualKeyValue = seeker.getKeyValueBuffer();
@ -353,9 +384,8 @@ public class TestDataBlockEncoders {
}
}
}
private void testEncodersOnDataset(ByteBuffer onDataset)
throws IOException{
private void testEncodersOnDataset(ByteBuffer onDataset, List<KeyValue> kvList) throws IOException {
ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity());
onDataset.rewind();
dataset.put(onDataset);
@ -366,11 +396,13 @@ public class TestDataBlockEncoders {
if (encoding.getEncoder() == null) {
continue;
}
testAlgorithm(dataset, encoding);
testAlgorithm(dataset, encoding, kvList);
// ensure that dataset is unchanged
dataset.rewind();
assertEquals("Input of two methods is changed", onDataset, dataset);
}
}
}

View File

@ -29,10 +29,12 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.LruBlockCache;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.regionserver.HRegion;
@ -68,6 +70,7 @@ public class TestEncodedSeekers {
private final HBaseTestingUtility testUtil = HBaseTestingUtility.createLocalHTU();
private final DataBlockEncoding encoding;
private final boolean encodeOnDisk;
private final boolean includeTags;
/** Enable when debugging */
private static final boolean VERBOSE = false;
@ -76,21 +79,27 @@ public class TestEncodedSeekers {
public static Collection<Object[]> parameters() {
List<Object[]> paramList = new ArrayList<Object[]>();
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
for (boolean encodeOnDisk : new boolean[]{false, true}) {
paramList.add(new Object[] { encoding, encodeOnDisk });
for (boolean includeTags : new boolean[] { false, true }) {
for (boolean encodeOnDisk : new boolean[] { false, true }) {
paramList.add(new Object[] { encoding, encodeOnDisk, includeTags });
}
}
}
return paramList;
}
public TestEncodedSeekers(DataBlockEncoding encoding, boolean encodeOnDisk) {
public TestEncodedSeekers(DataBlockEncoding encoding, boolean encodeOnDisk, boolean includeTags) {
this.encoding = encoding;
this.encodeOnDisk = encodeOnDisk;
this.includeTags = includeTags;
}
@Test
public void testEncodedSeeker() throws IOException {
System.err.println("Testing encoded seekers for encoding " + encoding);
if(includeTags) {
testUtil.getConfiguration().setInt(HFile.FORMAT_VERSION_KEY, 3);
}
LruBlockCache cache =
(LruBlockCache)new CacheConfig(testUtil.getConfiguration()).getBlockCache();
cache.clearCache();
@ -134,6 +143,11 @@ public class TestEncodedSeekers {
byte[] col = Bytes.toBytes(String.valueOf(j));
byte[] value = dataGenerator.generateRandomSizeValue(key, col);
put.add(CF_BYTES, col, value);
if(includeTags) {
Tag[] tag = new Tag[1];
tag[0] = new Tag((byte)1, "Visibility");
put.add(CF_BYTES, col, value, tag);
}
if(VERBOSE){
KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value);
System.err.println(Strings.padFront(i+"", ' ', 4)+" "+kvPut);

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.io.encoding;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;
@ -27,6 +28,7 @@ import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;
import java.util.concurrent.ConcurrentSkipListSet;
@ -35,24 +37,30 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.CollectionBackedScanner;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
/**
* Tests scanning/seeking data with PrefixTree Encoding.
*/
@RunWith(Parameterized.class)
@Category(SmallTests.class)
public class TestPrefixTreeEncoding {
private static final Log LOG = LogFactory
.getLog(TestPrefixTreeEncoding.class);
static final String CF = "EncodingTestCF";
static final byte[] CF_BYTES = Bytes.toBytes(CF);
private static final Log LOG = LogFactory.getLog(TestPrefixTreeEncoding.class);
private static final String CF = "EncodingTestCF";
private static final byte[] CF_BYTES = Bytes.toBytes(CF);
private static final int NUM_ROWS_PER_BATCH = 50;
private static final int NUM_COLS_PER_ROW = 20;
@ -61,7 +69,21 @@ public class TestPrefixTreeEncoding {
KeyValue.COMPARATOR);
private static boolean formatRowNum = false;
@Parameters
public static Collection<Object[]> parameters() {
List<Object[]> paramList = new ArrayList<Object[]>();
{
paramList.add(new Object[] { false });
paramList.add(new Object[] { true });
}
return paramList;
}
private final boolean includesTag;
public TestPrefixTreeEncoding(boolean includesTag) {
this.includesTag = includesTag;
}
@Before
public void setUp() throws Exception {
kvset.clear();
@ -73,63 +95,74 @@ public class TestPrefixTreeEncoding {
formatRowNum = true;
PrefixTreeCodec encoder = new PrefixTreeCodec();
int batchId = numBatchesWritten++;
ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, false);
ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, false, includesTag);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(false);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(Algorithm.NONE);
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, false);
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
encoder.encodeKeyValues(dataBuffer, blkEncodingCtx);
EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
DataBlockEncoding.ID_SIZE, onDiskBytes.length
- DataBlockEncoding.ID_SIZE);
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
seeker.setCurrentBuffer(readBuffer);
// Seek before the first keyvalue;
KeyValue seekKey = KeyValue.createFirstDeleteFamilyOnRow(
getRowKey(batchId, 0), CF_BYTES);
seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
seekKey.getKeyLength(), true);
KeyValue seekKey = KeyValue.createFirstDeleteFamilyOnRow(getRowKey(batchId, 0), CF_BYTES);
seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), seekKey.getKeyLength(),
true);
assertEquals(null, seeker.getKeyValue());
// Seek before the middle keyvalue;
seekKey = KeyValue.createFirstDeleteFamilyOnRow(
getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), CF_BYTES);
seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
seekKey.getKeyLength(), true);
seekKey = KeyValue.createFirstDeleteFamilyOnRow(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3),
CF_BYTES);
seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), seekKey.getKeyLength(),
true);
assertNotNull(seeker.getKeyValue());
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1), seeker
.getKeyValue().getRow());
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1), seeker.getKeyValue().getRow());
// Seek before the last keyvalue;
seekKey = KeyValue.createFirstDeleteFamilyOnRow(Bytes.toBytes("zzzz"),
CF_BYTES);
seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(),
seekKey.getKeyLength(), true);
seekKey = KeyValue.createFirstDeleteFamilyOnRow(Bytes.toBytes("zzzz"), CF_BYTES);
seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), seekKey.getKeyLength(),
true);
assertNotNull(seeker.getKeyValue());
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1), seeker
.getKeyValue().getRow());
assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1), seeker.getKeyValue().getRow());
}
@Test
public void testScanWithRandomData() throws Exception {
PrefixTreeCodec encoder = new PrefixTreeCodec();
ByteBuffer dataBuffer = generateRandomTestData(kvset, numBatchesWritten++);
ByteBuffer dataBuffer = generateRandomTestData(kvset, numBatchesWritten++, includesTag);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(false);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(Algorithm.NONE);
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, false);
byte[] onDiskBytes=blkEncodingCtx.getOnDiskBytesWithHeader();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
DataBlockEncoding.ID_SIZE, onDiskBytes.length
- DataBlockEncoding.ID_SIZE);
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
encoder.encodeKeyValues(dataBuffer, blkEncodingCtx);
EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
seeker.setCurrentBuffer(readBuffer);
KeyValue previousKV = null;
do{
do {
KeyValue currentKV = seeker.getKeyValue();
System.out.println(currentKV);
if (previousKV != null && KeyValue.COMPARATOR.compare(currentKV, previousKV) < 0) {
dumpInputKVSet();
fail("Current kv " + currentKV + " is smaller than previous keyvalue "
+ previousKV);
fail("Current kv " + currentKV + " is smaller than previous keyvalue " + previousKV);
}
if (!includesTag) {
assertFalse(currentKV.getTagsLength() > 0);
} else {
Assert.assertTrue(currentKV.getTagsLength() > 0);
}
previousKV = currentKV;
} while (seeker.next());
@ -139,15 +172,20 @@ public class TestPrefixTreeEncoding {
public void testSeekWithRandomData() throws Exception {
PrefixTreeCodec encoder = new PrefixTreeCodec();
int batchId = numBatchesWritten++;
ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId);
ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId, includesTag);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(false);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(Algorithm.NONE);
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, false);
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
encoder.encodeKeyValues(dataBuffer, blkEncodingCtx);
EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
DataBlockEncoding.ID_SIZE, onDiskBytes.length
- DataBlockEncoding.ID_SIZE);
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
verifySeeking(seeker, readBuffer, batchId);
}
@ -155,19 +193,23 @@ public class TestPrefixTreeEncoding {
public void testSeekWithFixedData() throws Exception {
PrefixTreeCodec encoder = new PrefixTreeCodec();
int batchId = numBatchesWritten++;
ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId);
ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, includesTag);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(false);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(Algorithm.NONE);
HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext(
Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]);
encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx);
DataBlockEncoding.PREFIX_TREE, new byte[0], meta);
encoder.encodeKeyValues(dataBuffer, blkEncodingCtx);
EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR,
false);
encoder.newDataBlockDecodingContext(meta));
byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader();
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes,
DataBlockEncoding.ID_SIZE, onDiskBytes.length
- DataBlockEncoding.ID_SIZE);
ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE,
onDiskBytes.length - DataBlockEncoding.ID_SIZE);
verifySeeking(seeker, readBuffer, batchId);
}
private void verifySeeking(EncodedSeeker encodeSeeker,
ByteBuffer encodedData, int batchId) {
List<KeyValue> kvList = new ArrayList<KeyValue>();
@ -202,73 +244,93 @@ public class TestPrefixTreeEncoding {
System.out.println(kv);
}
}
private static ByteBuffer generateFixedTestData(
ConcurrentSkipListSet<KeyValue> kvset, int batchId) throws Exception {
return generateFixedTestData(kvset, batchId, true);
private static ByteBuffer generateFixedTestData(ConcurrentSkipListSet<KeyValue> kvset,
int batchId, boolean useTags) throws Exception {
return generateFixedTestData(kvset, batchId, true, useTags);
}
private static ByteBuffer generateFixedTestData(
ConcurrentSkipListSet<KeyValue> kvset, int batchId, boolean partial)
throws Exception {
private static ByteBuffer generateFixedTestData(ConcurrentSkipListSet<KeyValue> kvset,
int batchId, boolean partial, boolean useTags) throws Exception {
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
if (partial && i / 10 % 2 == 1) continue;
if (partial && i / 10 % 2 == 1)
continue;
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES,
getQualifier(j), getValue(batchId, i, j));
kvset.add(kv);
if (!useTags) {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue(
batchId, i, j));
kvset.add(kv);
} else {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l,
getValue(batchId, i, j), new Tag[] { new Tag((byte) 1, "metaValue1") });
kvset.add(kv);
}
}
}
for (KeyValue kv : kvset) {
userDataStream.writeInt(kv.getKeyLength());
userDataStream.writeInt(kv.getValueLength());
userDataStream
.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
userDataStream.write(kv.getBuffer(), kv.getValueOffset(),
kv.getValueLength());
userDataStream.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
userDataStream.write(kv.getBuffer(), kv.getValueOffset(), kv.getValueLength());
if (useTags) {
userDataStream.writeShort(kv.getTagsLength());
userDataStream.write(kv.getBuffer(), kv.getValueOffset() + kv.getValueLength()
+ Bytes.SIZEOF_SHORT, kv.getTagsLength());
}
}
return ByteBuffer.wrap(baosInMemory.toByteArray());
}
private static ByteBuffer generateRandomTestData(
ConcurrentSkipListSet<KeyValue> kvset, int batchId) throws Exception {
private static ByteBuffer generateRandomTestData(ConcurrentSkipListSet<KeyValue> kvset,
int batchId, boolean useTags) throws Exception {
ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream();
DataOutputStream userDataStream = new DataOutputStream(baosInMemory);
Random random = new Random();
for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) {
if (random.nextInt(100) < 50) continue;
if (random.nextInt(100) < 50)
continue;
for (int j = 0; j < NUM_COLS_PER_ROW; ++j) {
if (random.nextInt(100) < 50) continue;
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES,
getQualifier(j), getValue(batchId, i, j));
kvset.add(kv);
if (random.nextInt(100) < 50)
continue;
if (!useTags) {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue(
batchId, i, j));
kvset.add(kv);
} else {
KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l,
getValue(batchId, i, j), new Tag[] { new Tag((byte) 1, "metaValue1") });
kvset.add(kv);
}
}
}
for (KeyValue kv : kvset) {
userDataStream.writeInt(kv.getKeyLength());
userDataStream.writeInt(kv.getValueLength());
userDataStream
.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
userDataStream.write(kv.getBuffer(), kv.getValueOffset(),
kv.getValueLength());
userDataStream.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
userDataStream.write(kv.getBuffer(), kv.getValueOffset(), kv.getValueLength());
if (useTags) {
userDataStream.writeShort(kv.getTagsLength());
userDataStream.write(kv.getBuffer(), kv.getValueOffset() + kv.getValueLength()
+ Bytes.SIZEOF_SHORT, kv.getTagsLength());
}
}
return ByteBuffer.wrap(baosInMemory.toByteArray());
}
private static byte[] getRowKey(int batchId, int i) {
return Bytes.toBytes("batch" + batchId + "_row"
+ (formatRowNum ? String.format("%04d", i) : i));
return Bytes
.toBytes("batch" + batchId + "_row" + (formatRowNum ? String.format("%04d", i) : i));
}
private static byte[] getQualifier(int j) {
return Bytes.toBytes("col" + j);
return Bytes.toBytes("colfdfafhfhsdfhsdfh" + j);
}
private static byte[] getValue(int batchId, int i, int j) {
return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i))
+ "_col" + j);
return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) + "_col" + j);
}
}

View File

@ -37,6 +37,8 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MultithreadedTestUtil;
import org.apache.hadoop.hbase.MultithreadedTestUtil.TestThread;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.util.ChecksumType;
@ -339,13 +341,18 @@ public class CacheTestUtils {
cachedBuffer.putInt(uncompressedSizeWithoutHeader);
cachedBuffer.putLong(prevBlockOffset);
cachedBuffer.rewind();
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(false);
meta.setCompressAlgo(Compression.Algorithm.NONE);
meta.setBytesPerChecksum(0);
meta.setChecksumType(ChecksumType.NULL);
HFileBlock generated = new HFileBlock(BlockType.DATA,
onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader,
prevBlockOffset, cachedBuffer, HFileBlock.DONT_FILL_HEADER,
blockSize, includesMemstoreTS, HFileBlock.MINOR_VERSION_NO_CHECKSUM,
0, ChecksumType.NULL.getCode(),
onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE);
blockSize,
onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, meta);
String strKey;
/* No conflicting keys */

View File

@ -0,0 +1,31 @@
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.io.hfile;
/**
* Used in testcases only.
*/
public enum TagUsage {
// No tags would be added
NO_TAG,
// KVs with tags
ONLY_TAG,
// kvs with and without tags
PARTIAL_TAG;
}

View File

@ -40,13 +40,14 @@ import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.util.BloomFilterFactory;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
@ -183,6 +184,7 @@ public class TestCacheOnWrite {
@Before
public void setUp() throws IOException {
conf = TEST_UTIL.getConfiguration();
this.conf.set("dfs.datanode.data.dir.perm", "700");
conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION);
conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE);
conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,
@ -207,13 +209,24 @@ public class TestCacheOnWrite {
@Test
public void testStoreFileCacheOnWrite() throws IOException {
writeStoreFile();
readStoreFile();
testStoreFileCacheOnWriteInternals(false);
testStoreFileCacheOnWriteInternals(true);
}
private void readStoreFile() throws IOException {
HFileReaderV2 reader = (HFileReaderV2) HFile.createReaderWithEncoding(fs,
storeFilePath, cacheConf, encoder.getEncodingInCache());
protected void testStoreFileCacheOnWriteInternals(boolean useTags) throws IOException {
writeStoreFile(useTags);
readStoreFile(useTags);
}
private void readStoreFile(boolean useTags) throws IOException {
AbstractHFileReader reader;
if (useTags) {
reader = (HFileReaderV3) HFile.createReaderWithEncoding(fs, storeFilePath, cacheConf,
encoder.getEncodingInCache());
} else {
reader = (HFileReaderV2) HFile.createReaderWithEncoding(fs, storeFilePath, cacheConf,
encoder.getEncodingInCache());
}
LOG.info("HFile information: " + reader);
final boolean cacheBlocks = false;
final boolean pread = false;
@ -260,10 +273,13 @@ public class TestCacheOnWrite {
String countByType = blockCountByType.toString();
BlockType cachedDataBlockType =
encoderType.encodeInCache ? BlockType.ENCODED_DATA : BlockType.DATA;
assertEquals("{" + cachedDataBlockType
+ "=1379, LEAF_INDEX=154, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=18}",
countByType);
if (useTags) {
assertEquals("{" + cachedDataBlockType
+ "=1550, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=20}", countByType);
} else {
assertEquals("{" + cachedDataBlockType
+ "=1379, LEAF_INDEX=154, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=18}", countByType);
}
reader.close();
}
@ -283,33 +299,54 @@ public class TestCacheOnWrite {
}
}
public void writeStoreFile() throws IOException {
public void writeStoreFile(boolean useTags) throws IOException {
if(useTags) {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
} else {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
}
Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(),
"test_cache_on_write");
StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs,
DATA_BLOCK_SIZE)
.withOutputDir(storeFileParentDir)
.withCompression(compress)
.withDataBlockEncoder(encoder)
.withComparator(KeyValue.COMPARATOR)
.withBloomType(BLOOM_TYPE)
.withMaxKeyCount(NUM_KV)
.withChecksumType(CKTYPE)
.withBytesPerChecksum(CKBYTES)
.build();
HFileContext meta = new HFileContext();
meta.setCompressAlgo(compress);
meta.setChecksumType(CKTYPE);
meta.setBytesPerChecksum(CKBYTES);
meta.setBlocksize(DATA_BLOCK_SIZE);
meta.setEncodingInCache(encoder.getEncodingInCache());
meta.setEncodingOnDisk(encoder.getEncodingOnDisk());
StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs)
.withOutputDir(storeFileParentDir).withComparator(KeyValue.COMPARATOR)
.withFileContext(meta)
.withBloomType(BLOOM_TYPE).withMaxKeyCount(NUM_KV).build();
final int rowLen = 32;
for (int i = 0; i < NUM_KV; ++i) {
byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i);
byte[] v = TestHFileWriterV2.randomValue(rand);
int cfLen = rand.nextInt(k.length - rowLen + 1);
KeyValue kv = new KeyValue(
KeyValue kv;
if(useTags) {
Tag t = new Tag((byte) 1, "visibility");
List<Tag> tagList = new ArrayList<Tag>();
tagList.add(t);
Tag[] tags = new Tag[1];
tags[0] = t;
kv = new KeyValue(
k, 0, rowLen,
k, rowLen, cfLen,
k, rowLen + cfLen, k.length - rowLen - cfLen,
rand.nextLong(),
generateKeyType(rand),
v, 0, v.length, tagList);
} else {
kv = new KeyValue(
k, 0, rowLen,
k, rowLen, cfLen,
k, rowLen + cfLen, k.length - rowLen - cfLen,
rand.nextLong(),
generateKeyType(rand),
v, 0, v.length);
}
sfw.append(kv);
}
@ -319,6 +356,16 @@ public class TestCacheOnWrite {
@Test
public void testNotCachingDataBlocksDuringCompaction() throws IOException {
testNotCachingDataBlocksDuringCompactionInternals(false);
testNotCachingDataBlocksDuringCompactionInternals(true);
}
protected void testNotCachingDataBlocksDuringCompactionInternals(boolean useTags) throws IOException {
if (useTags) {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
} else {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2);
}
// TODO: need to change this test if we add a cache size threshold for
// compactions, or if we implement some other kind of intelligent logic for
// deciding what blocks to cache-on-write on compaction.
@ -347,8 +394,14 @@ public class TestCacheOnWrite {
String qualStr = "col" + iCol;
String valueStr = "value_" + rowStr + "_" + qualStr;
for (int iTS = 0; iTS < 5; ++iTS) {
p.add(cfBytes, Bytes.toBytes(qualStr), ts++,
Bytes.toBytes(valueStr));
if (useTags) {
Tag t = new Tag((byte) 1, "visibility");
Tag[] tags = new Tag[1];
tags[0] = t;
p.add(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr), tags);
} else {
p.add(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr));
}
}
}
region.put(p);
@ -369,6 +422,5 @@ public class TestCacheOnWrite {
region.close();
blockCache.shutdown();
}
}

View File

@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.util.ChecksumType;
@ -79,6 +80,11 @@ public class TestChecksum {
*/
@Test
public void testChecksumCorruption() throws IOException {
testChecksumCorruptionInternals(false);
testChecksumCorruptionInternals(true);
}
protected void testChecksumCorruptionInternals(boolean useTags) throws IOException {
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : new boolean[] { false, true }) {
LOG.info("testChecksumCorruption: Compression algorithm: " + algo +
@ -86,9 +92,13 @@ public class TestChecksum {
Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_"
+ algo);
FSDataOutputStream os = fs.create(path);
HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null,
true, HFile.DEFAULT_CHECKSUM_TYPE,
HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileContext meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(true);
meta.setIncludesTags(useTags);
meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE);
meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta);
long totalSize = 0;
for (int blockId = 0; blockId < 2; ++blockId) {
DataOutputStream dos = hbw.startWriting(BlockType.DATA);
@ -104,8 +114,12 @@ public class TestChecksum {
// Do a read that purposely introduces checksum verification failures.
FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path);
HFileBlock.FSReader hbr = new FSReaderV2Test(is, algo,
totalSize, HFile.MAX_FORMAT_VERSION, fs, path);
meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(true);
meta.setIncludesTags(useTags);
meta.setUsesHBaseChecksum(true);
HFileBlock.FSReader hbr = new FSReaderV2Test(is, totalSize, fs, path, meta);
HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
b.sanityCheck();
assertEquals(4936, b.getUncompressedSizeWithoutHeader());
@ -147,8 +161,7 @@ public class TestChecksum {
HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false);
assertEquals(false, newfs.useHBaseChecksum());
is = new FSDataInputStreamWrapper(newfs, path);
hbr = new FSReaderV2Test(is, algo,
totalSize, HFile.MAX_FORMAT_VERSION, newfs, path);
hbr = new FSReaderV2Test(is, totalSize, newfs, path, meta);
b = hbr.readBlockData(0, -1, -1, pread);
is.close();
b.sanityCheck();
@ -173,14 +186,26 @@ public class TestChecksum {
*/
@Test
public void testChecksumChunks() throws IOException {
testChecksumInternals(false);
testChecksumInternals(true);
}
protected void testChecksumInternals(boolean useTags) throws IOException {
Compression.Algorithm algo = NONE;
for (boolean pread : new boolean[] { false, true }) {
for (int bytesPerChecksum : BYTES_PER_CHECKSUM) {
Path path = new Path(TEST_UTIL.getDataTestDir(), "checksumChunk_" +
algo + bytesPerChecksum);
FSDataOutputStream os = fs.create(path);
HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null,
true, HFile.DEFAULT_CHECKSUM_TYPE, bytesPerChecksum);
HFileContext meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(true);
meta.setIncludesTags(useTags);
meta.setUsesHBaseChecksum(true);
meta.setBytesPerChecksum(bytesPerChecksum);
meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE);
HFileBlock.Writer hbw = new HFileBlock.Writer(null,
meta);
// write one block. The block has data
// that is at least 6 times more than the checksum chunk size
@ -211,8 +236,14 @@ public class TestChecksum {
// Read data back from file.
FSDataInputStream is = fs.open(path);
FSDataInputStream nochecksum = hfs.getNoChecksumFs().open(path);
meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(true);
meta.setIncludesTags(useTags);
meta.setUsesHBaseChecksum(true);
meta.setBytesPerChecksum(bytesPerChecksum);
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(
is, nochecksum), algo, totalSize, HFile.MAX_FORMAT_VERSION, hfs, path);
is, nochecksum), totalSize, hfs, path, meta);
HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
is.close();
b.sanityCheck();
@ -257,9 +288,9 @@ public class TestChecksum {
* checksum validations.
*/
static private class FSReaderV2Test extends HFileBlock.FSReaderV2 {
public FSReaderV2Test(FSDataInputStreamWrapper istream, Algorithm algo, long fileSize,
int minorVersion, FileSystem fs,Path path) throws IOException {
super(istream, algo, fileSize, minorVersion, (HFileSystem)fs, path);
public FSReaderV2Test(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs,
Path path, HFileContext meta) throws IOException {
super(istream, fileSize, (HFileSystem) fs, path, meta);
}
@Override

View File

@ -28,6 +28,7 @@ import java.util.Collection;
import java.util.List;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@ -49,12 +50,13 @@ import org.apache.hadoop.fs.Path;
public class TestFixedFileTrailer {
private static final Log LOG = LogFactory.getLog(TestFixedFileTrailer.class);
private static final int MAX_COMPARATOR_NAME_LENGTH = 128;
/**
* The number of used fields by version. Indexed by version minus two.
* Min version that we support is V2
*/
private static final int[] NUM_FIELDS_BY_VERSION = new int[] { 14 };
private static final int[] NUM_FIELDS_BY_VERSION = new int[] { 14, 14 };
private HBaseTestingUtility util = new HBaseTestingUtility();
private FileSystem fs;
@ -86,7 +88,7 @@ public class TestFixedFileTrailer {
@Test
public void testTrailer() throws IOException {
FixedFileTrailer t = new FixedFileTrailer(version,
HFileBlock.MINOR_VERSION_NO_CHECKSUM);
HFileReaderV2.PBUF_TRAILER_MINOR_VERSION);
t.setDataIndexCount(3);
t.setEntryCount(((long) Integer.MAX_VALUE) + 1);
@ -119,7 +121,7 @@ public class TestFixedFileTrailer {
{
DataInputStream dis = new DataInputStream(bais);
FixedFileTrailer t2 = new FixedFileTrailer(version,
HFileBlock.MINOR_VERSION_NO_CHECKSUM);
HFileReaderV2.PBUF_TRAILER_MINOR_VERSION);
t2.deserialize(dis);
assertEquals(-1, bais.read()); // Ensure we have read everything.
checkLoadedTrailer(version, t, t2);
@ -163,6 +165,68 @@ public class TestFixedFileTrailer {
trailerStr.split(", ").length);
assertEquals(trailerStr, t4.toString());
}
@Test
public void testTrailerForV2NonPBCompatibility() throws Exception {
if (version == 2) {
FixedFileTrailer t = new FixedFileTrailer(version,
HFileReaderV2.MINOR_VERSION_NO_CHECKSUM);
t.setDataIndexCount(3);
t.setEntryCount(((long) Integer.MAX_VALUE) + 1);
t.setLastDataBlockOffset(291);
t.setNumDataIndexLevels(3);
t.setComparatorClass(KeyValue.COMPARATOR.getClass());
t.setFirstDataBlockOffset(9081723123L); // Completely unrealistic.
t.setUncompressedDataIndexSize(827398717L); // Something random.
t.setLoadOnOpenOffset(128);
t.setMetaIndexCount(7);
t.setTotalUncompressedBytes(129731987);
{
DataOutputStream dos = new DataOutputStream(baos); // Limited scope.
serializeAsWritable(dos, t);
dos.flush();
assertEquals(FixedFileTrailer.getTrailerSize(version), dos.size());
}
byte[] bytes = baos.toByteArray();
baos.reset();
assertEquals(bytes.length, FixedFileTrailer.getTrailerSize(version));
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
{
DataInputStream dis = new DataInputStream(bais);
FixedFileTrailer t2 = new FixedFileTrailer(version,
HFileReaderV2.MINOR_VERSION_NO_CHECKSUM);
t2.deserialize(dis);
assertEquals(-1, bais.read()); // Ensure we have read everything.
checkLoadedTrailer(version, t, t2);
}
}
}
// Copied from FixedFileTrailer for testing the reading part of
// FixedFileTrailer of non PB
// serialized FFTs.
private void serializeAsWritable(DataOutputStream output, FixedFileTrailer fft)
throws IOException {
BlockType.TRAILER.write(output);
output.writeLong(fft.getFileInfoOffset());
output.writeLong(fft.getLoadOnOpenDataOffset());
output.writeInt(fft.getDataIndexCount());
output.writeLong(fft.getUncompressedDataIndexSize());
output.writeInt(fft.getMetaIndexCount());
output.writeLong(fft.getTotalUncompressedBytes());
output.writeLong(fft.getEntryCount());
output.writeInt(fft.getCompressionCodec().ordinal());
output.writeInt(fft.getNumDataIndexLevels());
output.writeLong(fft.getFirstDataBlockOffset());
output.writeLong(fft.getLastDataBlockOffset());
Bytes.writeStringFixedSize(output, fft.getComparatorClassName(), MAX_COMPARATOR_NAME_LENGTH);
output.writeInt(FixedFileTrailer.materializeVersion(fft.getMajorVersion(),
fft.getMinorVersion()));
}
private FixedFileTrailer readTrailer(Path trailerPath) throws IOException {
FSDataInputStream fsdis = fs.open(trailerPath);

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.HBaseTestCase;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.SmallTests;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
@ -82,8 +83,10 @@ public class TestHFile extends HBaseTestCase {
public void testEmptyHFile() throws IOException {
if (cacheConf == null) cacheConf = new CacheConfig(conf);
Path f = new Path(ROOT_DIR, getName());
HFileContext context = new HFileContext();
context.setIncludesTags(false);
Writer w =
HFile.getWriterFactory(conf, cacheConf).withPath(fs, f).create();
HFile.getWriterFactory(conf, cacheConf).withPath(fs, f).withFileContext(context).create();
w.close();
Reader r = HFile.createReader(fs, f, cacheConf);
r.loadFileInfo();
@ -130,8 +133,10 @@ public class TestHFile extends HBaseTestCase {
public void testCorruptTruncatedHFile() throws IOException {
if (cacheConf == null) cacheConf = new CacheConfig(conf);
Path f = new Path(ROOT_DIR, getName());
Writer w = HFile.getWriterFactory(conf, cacheConf).withPath(this.fs, f).create();
writeSomeRecords(w, 0, 100);
HFileContext context = new HFileContext();
Writer w = HFile.getWriterFactory(conf, cacheConf).withPath(this.fs, f)
.withFileContext(context).create();
writeSomeRecords(w, 0, 100, false);
w.close();
Path trunc = new Path(f.getParent(), "trucated");
@ -148,12 +153,17 @@ public class TestHFile extends HBaseTestCase {
// write some records into the tfile
// write them twice
private int writeSomeRecords(Writer writer, int start, int n)
private int writeSomeRecords(Writer writer, int start, int n, boolean useTags)
throws IOException {
String value = "value";
for (int i = start; i < (start + n); i++) {
String key = String.format(localFormatter, Integer.valueOf(i));
writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key));
if (useTags) {
Tag t = new Tag((byte) 1, "myTag1");
writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key), t.getBuffer());
} else {
writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key));
}
}
return (start + n);
}
@ -192,8 +202,8 @@ public class TestHFile extends HBaseTestCase {
return String.format(localFormatter, Integer.valueOf(rowId)).getBytes();
}
private void writeRecords(Writer writer) throws IOException {
writeSomeRecords(writer, 0, 100);
private void writeRecords(Writer writer, boolean useTags) throws IOException {
writeSomeRecords(writer, 0, 100, useTags);
writer.close();
}
@ -205,20 +215,26 @@ public class TestHFile extends HBaseTestCase {
/**
* test none codecs
* @param useTags
*/
void basicWithSomeCodec(String codec) throws IOException {
void basicWithSomeCodec(String codec, boolean useTags) throws IOException {
if (useTags) {
conf.setInt("hfile.format.version", 3);
}
if (cacheConf == null) cacheConf = new CacheConfig(conf);
Path ncTFile = new Path(ROOT_DIR, "basic.hfile." + codec.toString());
Path ncTFile = new Path(ROOT_DIR, "basic.hfile." + codec.toString() + useTags);
FSDataOutputStream fout = createFSOutput(ncTFile);
HFileContext meta = new HFileContext();
meta.setBlocksize(minBlockSize);
meta.setCompressAlgo(AbstractHFileWriter.compressionByName(codec));
Writer writer = HFile.getWriterFactory(conf, cacheConf)
.withOutputStream(fout)
.withBlockSize(minBlockSize)
.withCompression(codec)
.withFileContext(meta)
// NOTE: This test is dependent on this deprecated nonstandard comparator
.withComparator(new KeyValue.RawBytesComparator())
.create();
LOG.info(writer);
writeRecords(writer);
writeRecords(writer, useTags);
fout.close();
FSDataInputStream fin = fs.open(ncTFile);
Reader reader = HFile.createReaderFromStream(ncTFile, fs.open(ncTFile),
@ -250,8 +266,13 @@ public class TestHFile extends HBaseTestCase {
}
public void testTFileFeatures() throws IOException {
basicWithSomeCodec("none");
basicWithSomeCodec("gz");
testTFilefeaturesInternals(false);
testTFilefeaturesInternals(true);
}
protected void testTFilefeaturesInternals(boolean useTags) throws IOException {
basicWithSomeCodec("none", useTags);
basicWithSomeCodec("gz", useTags);
}
private void writeNumMetablocks(Writer writer, int n) {
@ -292,10 +313,12 @@ public class TestHFile extends HBaseTestCase {
if (cacheConf == null) cacheConf = new CacheConfig(conf);
Path mFile = new Path(ROOT_DIR, "meta.hfile");
FSDataOutputStream fout = createFSOutput(mFile);
HFileContext meta = new HFileContext();
meta.setCompressAlgo(AbstractHFileWriter.compressionByName(compress));
meta.setBlocksize(minBlockSize);
Writer writer = HFile.getWriterFactory(conf, cacheConf)
.withOutputStream(fout)
.withBlockSize(minBlockSize)
.withCompression(compress)
.withFileContext(meta)
.create();
someTestingWithMetaBlock(writer);
writer.close();
@ -324,10 +347,12 @@ public class TestHFile extends HBaseTestCase {
HBaseTestingUtility.COMPRESSION_ALGORITHMS) {
Path mFile = new Path(ROOT_DIR, "nometa_" + compressAlgo + ".hfile");
FSDataOutputStream fout = createFSOutput(mFile);
HFileContext meta = new HFileContext();
meta.setCompressAlgo((compressAlgo));
meta.setBlocksize(minBlockSize);
Writer writer = HFile.getWriterFactory(conf, cacheConf)
.withOutputStream(fout)
.withBlockSize(minBlockSize)
.withCompression(compressAlgo)
.withFileContext(meta)
.create();
writer.append("foo".getBytes(), "value".getBytes());
writer.close();

View File

@ -18,7 +18,11 @@
*/
package org.apache.hadoop.hbase.io.hfile;
import static org.junit.Assert.*;
import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.GZ;
import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.NONE;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
@ -33,6 +37,7 @@ import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Executors;
@ -46,8 +51,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.DoubleOutputStream;
import org.apache.hadoop.hbase.io.compress.Compression;
@ -61,9 +67,6 @@ import org.apache.hadoop.hbase.util.ChecksumType;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.compress.Compressor;
import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.*;
import org.junit.Before;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@ -97,14 +100,15 @@ public class TestHFileBlock {
private int uncompressedSizeV1;
private final boolean includesMemstoreTS;
public TestHFileBlock(boolean includesMemstoreTS) {
private final boolean includesTag;
public TestHFileBlock(boolean includesMemstoreTS, boolean includesTag) {
this.includesMemstoreTS = includesMemstoreTS;
this.includesTag = includesTag;
}
@Parameters
public static Collection<Object[]> parameters() {
return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED;
}
@Before
@ -118,7 +122,7 @@ public class TestHFileBlock {
dos.writeInt(i / 100);
}
static int writeTestKeyValues(OutputStream dos, int seed, boolean includesMemstoreTS)
static int writeTestKeyValues(OutputStream dos, int seed, boolean includesMemstoreTS, boolean useTag)
throws IOException {
List<KeyValue> keyValues = new ArrayList<KeyValue>();
Random randomizer = new Random(42l + seed); // just any fixed number
@ -163,24 +167,37 @@ public class TestHFileBlock {
} else {
timestamp = randomizer.nextLong();
}
keyValues.add(new KeyValue(row, family, qualifier, timestamp, value));
if (!useTag) {
keyValues.add(new KeyValue(row, family, qualifier, timestamp, value));
} else {
keyValues.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] { new Tag(
(byte) 1, Bytes.toBytes("myTagVal")) }));
}
}
// sort it and write to stream
int totalSize = 0;
Collections.sort(keyValues, KeyValue.COMPARATOR);
Collections.sort(keyValues, KeyValue.COMPARATOR);
DataOutputStream dataOutputStream = new DataOutputStream(dos);
for (KeyValue kv : keyValues) {
dataOutputStream.writeInt(kv.getKeyLength());
dataOutputStream.writeInt(kv.getValueLength());
dataOutputStream.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength());
dataOutputStream.write(kv.getBuffer(), kv.getValueOffset(), kv.getValueLength());
// Write the additonal tag into the stream
// always write the taglength
totalSize += kv.getLength();
dataOutputStream.write(kv.getBuffer(), kv.getOffset(), kv.getLength());
if (useTag) {
dataOutputStream.writeShort(kv.getTagsLength());
dataOutputStream.write(kv.getBuffer(), kv.getTagsOffset(), kv.getTagsLength());
}
if (includesMemstoreTS) {
long memstoreTS = randomizer.nextLong();
WritableUtils.writeVLong(dataOutputStream, memstoreTS);
totalSize += WritableUtils.getVIntSize(memstoreTS);
}
}
return totalSize;
}
@ -199,11 +216,15 @@ public class TestHFileBlock {
}
static HFileBlock.Writer createTestV2Block(Compression.Algorithm algo,
boolean includesMemstoreTS) throws IOException {
boolean includesMemstoreTS, boolean includesTag) throws IOException {
final BlockType blockType = BlockType.DATA;
HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null,
includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE,
HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileContext meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM);
meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE);
HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta);
DataOutputStream dos = hbw.startWriting(blockType);
writeTestBlockContents(dos);
dos.flush();
@ -214,8 +235,8 @@ public class TestHFileBlock {
}
public String createTestBlockStr(Compression.Algorithm algo,
int correctLength) throws IOException {
HFileBlock.Writer hbw = createTestV2Block(algo, includesMemstoreTS);
int correctLength, boolean useTag) throws IOException {
HFileBlock.Writer hbw = createTestV2Block(algo, includesMemstoreTS, useTag);
byte[] testV2Block = hbw.getHeaderAndDataForTest();
int osOffset = HConstants.HFILEBLOCK_HEADER_SIZE + 9;
if (testV2Block.length == correctLength) {
@ -231,7 +252,7 @@ public class TestHFileBlock {
@Test
public void testNoCompression() throws IOException {
assertEquals(4000, createTestV2Block(NONE, includesMemstoreTS).
assertEquals(4000, createTestV2Block(NONE, includesMemstoreTS, false).
getBlockForCaching().getUncompressedSizeWithoutHeader());
}
@ -257,7 +278,7 @@ public class TestHFileBlock {
+ "\\xD46\\xEA5\\xEA3\\xEA7\\xE7\\x00LI\\x5Cs\\xA0\\x0F\\x00\\x00"
+ "\\x00\\x00\\x00\\x00"; // 4 byte checksum (ignored)
final int correctGzipBlockLength = 95;
final String testBlockStr = createTestBlockStr(GZ, correctGzipBlockLength);
final String testBlockStr = createTestBlockStr(GZ, correctGzipBlockLength, false);
// We ignore the block checksum because createTestBlockStr can change the
// gzip header after the block is produced
assertEquals(correctTestBlockStr.substring(0, correctGzipBlockLength - 4),
@ -266,6 +287,13 @@ public class TestHFileBlock {
@Test
public void testReaderV2() throws IOException {
testReaderV2Internals();
}
protected void testReaderV2Internals() throws IOException {
if(includesTag) {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
}
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : new boolean[] { false, true }) {
LOG.info("testReaderV2: Compression algorithm: " + algo +
@ -273,9 +301,14 @@ public class TestHFileBlock {
Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_"
+ algo);
FSDataOutputStream os = fs.create(path);
HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null,
includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE,
HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileContext meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM);
meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE);
HFileBlock.Writer hbw = new HFileBlock.Writer(null,
meta);
long totalSize = 0;
for (int blockId = 0; blockId < 2; ++blockId) {
DataOutputStream dos = hbw.startWriting(BlockType.DATA);
@ -287,8 +320,12 @@ public class TestHFileBlock {
os.close();
FSDataInputStream is = fs.open(path);
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
totalSize);
meta = new HFileContext();
meta.setUsesHBaseChecksum(true);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(algo);
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, totalSize, meta);
HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
is.close();
assertEquals(0, HFile.getChecksumFailuresCount());
@ -301,7 +338,7 @@ public class TestHFileBlock {
if (algo == GZ) {
is = fs.open(path);
hbr = new HFileBlock.FSReaderV2(is, algo, totalSize);
hbr = new HFileBlock.FSReaderV2(is, totalSize, meta);
b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE +
b.totalChecksumBytes(), -1, pread);
assertEquals(blockStr, b.toString());
@ -330,7 +367,14 @@ public class TestHFileBlock {
*/
@Test
public void testDataBlockEncoding() throws IOException {
testInternals();
}
private void testInternals() throws IOException {
final int numBlocks = 5;
if(includesTag) {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
}
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : new boolean[] { false, true }) {
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
@ -339,27 +383,35 @@ public class TestHFileBlock {
FSDataOutputStream os = fs.create(path);
HFileDataBlockEncoder dataBlockEncoder =
new HFileDataBlockEncoderImpl(encoding);
HFileBlock.Writer hbw = new HFileBlock.Writer(algo, dataBlockEncoder,
includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE,
HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileContext meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM);
meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE);
HFileBlock.Writer hbw = new HFileBlock.Writer(dataBlockEncoder,
meta);
long totalSize = 0;
final List<Integer> encodedSizes = new ArrayList<Integer>();
final List<ByteBuffer> encodedBlocks = new ArrayList<ByteBuffer>();
for (int blockId = 0; blockId < numBlocks; ++blockId) {
DataOutputStream dos = hbw.startWriting(BlockType.DATA);
writeEncodedBlock(algo, encoding, dos, encodedSizes, encodedBlocks,
blockId, includesMemstoreTS, HConstants.HFILEBLOCK_DUMMY_HEADER);
blockId, includesMemstoreTS, HConstants.HFILEBLOCK_DUMMY_HEADER, includesTag);
hbw.writeHeaderAndData(os);
totalSize += hbw.getOnDiskSizeWithHeader();
}
os.close();
FSDataInputStream is = fs.open(path);
HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(is, algo,
totalSize);
meta = new HFileContext();
meta.setUsesHBaseChecksum(true);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(algo);
HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(is, totalSize, meta);
hbr.setDataBlockEncoder(dataBlockEncoder);
hbr.setIncludesMemstoreTS(includesMemstoreTS);
HFileBlock b;
int pos = 0;
for (int blockId = 0; blockId < numBlocks; ++blockId) {
@ -393,28 +445,31 @@ public class TestHFileBlock {
static void writeEncodedBlock(Algorithm algo, DataBlockEncoding encoding,
DataOutputStream dos, final List<Integer> encodedSizes,
final List<ByteBuffer> encodedBlocks, int blockId,
boolean includesMemstoreTS, byte[] dummyHeader) throws IOException {
final List<ByteBuffer> encodedBlocks, int blockId,
boolean includesMemstoreTS, byte[] dummyHeader, boolean useTag) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DoubleOutputStream doubleOutputStream =
new DoubleOutputStream(dos, baos);
writeTestKeyValues(doubleOutputStream, blockId, includesMemstoreTS);
writeTestKeyValues(doubleOutputStream, blockId, includesMemstoreTS, useTag);
ByteBuffer rawBuf = ByteBuffer.wrap(baos.toByteArray());
rawBuf.rewind();
DataBlockEncoder encoder = encoding.getEncoder();
int headerLen = dummyHeader.length;
byte[] encodedResultWithHeader = null;
HFileContext meta = new HFileContext();
meta.setCompressAlgo(algo);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(useTag);
if (encoder != null) {
HFileBlockEncodingContext encodingCtx =
encoder.newDataBlockEncodingContext(algo, encoding, dummyHeader);
encoder.encodeKeyValues(rawBuf, includesMemstoreTS,
encodingCtx);
HFileBlockEncodingContext encodingCtx = encoder.newDataBlockEncodingContext(encoding,
dummyHeader, meta);
encoder.encodeKeyValues(rawBuf, encodingCtx);
encodedResultWithHeader =
encodingCtx.getUncompressedBytesWithHeader();
} else {
HFileBlockDefaultEncodingContext defaultEncodingCtx =
new HFileBlockDefaultEncodingContext(algo, encoding, dummyHeader);
HFileBlockDefaultEncodingContext defaultEncodingCtx = new HFileBlockDefaultEncodingContext(
encoding, dummyHeader, meta);
byte[] rawBufWithHeader =
new byte[rawBuf.array().length + headerLen];
System.arraycopy(rawBuf.array(), 0, rawBufWithHeader,
@ -474,6 +529,10 @@ public class TestHFileBlock {
@Test
public void testPreviousOffset() throws IOException {
testPreviousOffsetInternals();
}
protected void testPreviousOffsetInternals() throws IOException {
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : BOOLEAN_VALUES) {
for (boolean cacheOnWrite : BOOLEAN_VALUES) {
@ -491,8 +550,12 @@ public class TestHFileBlock {
expectedPrevOffsets, expectedTypes, expectedContents);
FSDataInputStream is = fs.open(path);
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo,
totalSize);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(true);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(algo);
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, totalSize, meta);
long curOffset = 0;
for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
if (!pread) {
@ -656,6 +719,11 @@ public class TestHFileBlock {
@Test
public void testConcurrentReading() throws Exception {
testConcurrentReadingInternals();
}
protected void testConcurrentReadingInternals() throws IOException,
InterruptedException, ExecutionException {
for (Compression.Algorithm compressAlgo : COMPRESSION_ALGORITHMS) {
Path path =
new Path(TEST_UTIL.getDataTestDir(), "concurrent_reading");
@ -665,8 +733,12 @@ public class TestHFileBlock {
writeBlocks(rand, compressAlgo, path, offsets, null, types, null);
FSDataInputStream is = fs.open(path);
long fileSize = fs.getFileStatus(path).getLen();
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, compressAlgo,
fileSize);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(true);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(compressAlgo);
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, fileSize, meta);
Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS);
ExecutorCompletionService<Boolean> ecs =
@ -697,9 +769,14 @@ public class TestHFileBlock {
) throws IOException {
boolean cacheOnWrite = expectedContents != null;
FSDataOutputStream os = fs.create(path);
HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo, null,
includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE,
HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(true);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(compressAlgo);
meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM);
meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE);
HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta);
Map<BlockType, Long> prevOffsetByType = new HashMap<BlockType, Long>();
long totalSize = 0;
for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
@ -749,6 +826,10 @@ public class TestHFileBlock {
@Test
public void testBlockHeapSize() {
testBlockHeapSizeInternals();
}
protected void testBlockHeapSizeInternals() {
if (ClassSize.is32BitJVM()) {
assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 64);
} else {
@ -758,16 +839,24 @@ public class TestHFileBlock {
for (int size : new int[] { 100, 256, 12345 }) {
byte[] byteArr = new byte[HConstants.HFILEBLOCK_HEADER_SIZE + size];
ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size);
HFileContext meta = new HFileContext();
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setUsesHBaseChecksum(false);
meta.setCompressAlgo(Algorithm.NONE);
meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM);
meta.setChecksumType(ChecksumType.NULL);
meta.setBytesPerChecksum(0);
HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf,
HFileBlock.FILL_HEADER, -1, includesMemstoreTS,
HFileBlock.MINOR_VERSION_NO_CHECKSUM, 0, ChecksumType.NULL.getCode(),
0);
HFileBlock.FILL_HEADER, -1,
0, meta);
long byteBufferExpectedSize =
ClassSize.align(ClassSize.estimateBase(buf.getClass(), true)
+ HConstants.HFILEBLOCK_HEADER_SIZE + size);
long hfileMetaSize = ClassSize.align(ClassSize.estimateBase(HFileContext.class, true));
long hfileBlockExpectedSize =
ClassSize.align(ClassSize.estimateBase(HFileBlock.class, true));
long expected = hfileBlockExpectedSize + byteBufferExpectedSize;
long expected = hfileBlockExpectedSize + byteBufferExpectedSize + hfileMetaSize;
assertEquals("Block data size: " + size + ", byte buffer expected " +
"size: " + byteBufferExpectedSize + ", HFileBlock class expected " +
"size: " + hfileBlockExpectedSize + ";", expected,

View File

@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.util.Bytes;
@ -84,14 +85,16 @@ public class TestHFileBlockCompatibility {
private int uncompressedSizeV1;
private final boolean includesMemstoreTS;
private final boolean includesTag;
public TestHFileBlockCompatibility(boolean includesMemstoreTS) {
public TestHFileBlockCompatibility(boolean includesMemstoreTS, boolean includesTag) {
this.includesMemstoreTS = includesMemstoreTS;
this.includesTag = includesTag;
}
@Parameters
public static Collection<Object[]> parameters() {
return HBaseTestingUtility.BOOLEAN_PARAMETERIZED;
return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED;
}
@Before
@ -117,7 +120,7 @@ public class TestHFileBlockCompatibility {
throws IOException {
final BlockType blockType = BlockType.DATA;
Writer hbw = new Writer(algo, null,
includesMemstoreTS);
includesMemstoreTS, includesTag);
DataOutputStream dos = hbw.startWriting(blockType);
TestHFileBlock.writeTestBlockContents(dos);
// make sure the block is ready by calling hbw.getHeaderAndData()
@ -144,7 +147,7 @@ public class TestHFileBlockCompatibility {
@Test
public void testNoCompression() throws IOException {
assertEquals(4000, createTestV2Block(NONE).getBlockForCaching().
getUncompressedSizeWithoutHeader());
getUncompressedSizeWithoutHeader());
}
@Test
@ -172,6 +175,9 @@ public class TestHFileBlockCompatibility {
@Test
public void testReaderV2() throws IOException {
if(includesTag) {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
}
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : new boolean[] { false, true }) {
LOG.info("testReaderV2: Compression algorithm: " + algo +
@ -180,7 +186,7 @@ public class TestHFileBlockCompatibility {
+ algo);
FSDataOutputStream os = fs.create(path);
Writer hbw = new Writer(algo, null,
includesMemstoreTS);
includesMemstoreTS, includesTag);
long totalSize = 0;
for (int blockId = 0; blockId < 2; ++blockId) {
DataOutputStream dos = hbw.startWriting(BlockType.DATA);
@ -192,8 +198,13 @@ public class TestHFileBlockCompatibility {
os.close();
FSDataInputStream is = fs.open(path);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(algo);
HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is),
algo, totalSize, MINOR_VERSION, fs, path);
totalSize, fs, path, meta);
HFileBlock b = hbr.readBlockData(0, -1, -1, pread);
is.close();
@ -205,8 +216,8 @@ public class TestHFileBlockCompatibility {
if (algo == GZ) {
is = fs.open(path);
hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is),
algo, totalSize, MINOR_VERSION, fs, path);
hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is), totalSize, fs, path,
meta);
b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM +
b.totalChecksumBytes(), -1, pread);
assertEquals(blockStr, b.toString());
@ -235,6 +246,9 @@ public class TestHFileBlockCompatibility {
*/
@Test
public void testDataBlockEncoding() throws IOException {
if(includesTag) {
TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3);
}
final int numBlocks = 5;
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : new boolean[] { false, true }) {
@ -250,7 +264,7 @@ public class TestHFileBlockCompatibility {
TestHFileBlockCompatibility.Writer.DUMMY_HEADER);
TestHFileBlockCompatibility.Writer hbw =
new TestHFileBlockCompatibility.Writer(algo,
dataBlockEncoder, includesMemstoreTS);
dataBlockEncoder, includesMemstoreTS, includesTag);
long totalSize = 0;
final List<Integer> encodedSizes = new ArrayList<Integer>();
final List<ByteBuffer> encodedBlocks = new ArrayList<ByteBuffer>();
@ -258,7 +272,7 @@ public class TestHFileBlockCompatibility {
DataOutputStream dos = hbw.startWriting(BlockType.DATA);
TestHFileBlock.writeEncodedBlock(algo, encoding, dos, encodedSizes,
encodedBlocks, blockId, includesMemstoreTS,
TestHFileBlockCompatibility.Writer.DUMMY_HEADER);
TestHFileBlockCompatibility.Writer.DUMMY_HEADER, includesTag);
hbw.writeHeaderAndData(os);
totalSize += hbw.getOnDiskSizeWithHeader();
@ -266,8 +280,13 @@ public class TestHFileBlockCompatibility {
os.close();
FSDataInputStream is = fs.open(path);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(algo);
HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is),
algo, totalSize, MINOR_VERSION, fs, path);
totalSize, fs, path, meta);
hbr.setDataBlockEncoder(dataBlockEncoder);
hbr.setIncludesMemstoreTS(includesMemstoreTS);
@ -301,9 +320,6 @@ public class TestHFileBlockCompatibility {
}
}
}
/**
* This is the version of the HFileBlock.Writer that is used to
* create V2 blocks with minor version 0. These blocks do not
@ -392,33 +408,34 @@ public class TestHFileBlockCompatibility {
/** The offset of the previous block of the same type */
private long prevOffset;
/** Whether we are including memstore timestamp after every key/value */
private boolean includesMemstoreTS;
private HFileContext meta;
/**
* @param compressionAlgorithm compression algorithm to use
* @param dataBlockEncoderAlgo data block encoding algorithm to use
*/
public Writer(Compression.Algorithm compressionAlgorithm,
HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS) {
HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS, boolean includesTag) {
compressAlgo = compressionAlgorithm == null ? NONE : compressionAlgorithm;
this.dataBlockEncoder = dataBlockEncoder != null
? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE;
defaultBlockEncodingCtx =
new HFileBlockDefaultEncodingContext(compressionAlgorithm,
null, DUMMY_HEADER);
meta = new HFileContext();
meta.setUsesHBaseChecksum(false);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(includesTag);
meta.setCompressAlgo(compressionAlgorithm);
defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null, DUMMY_HEADER, meta);
dataBlockEncodingCtx =
this.dataBlockEncoder.newOnDiskDataBlockEncodingContext(
compressionAlgorithm, DUMMY_HEADER);
this.dataBlockEncoder.newOnDiskDataBlockEncodingContext(
DUMMY_HEADER, meta);
baosInMemory = new ByteArrayOutputStream();
prevOffsetByType = new long[BlockType.values().length];
for (int i = 0; i < prevOffsetByType.length; ++i)
prevOffsetByType[i] = -1;
this.includesMemstoreTS = includesMemstoreTS;
}
/**
@ -521,8 +538,7 @@ public class TestHFileBlockCompatibility {
uncompressedBytesWithHeader.length - HEADER_SIZE).slice();
//do the encoding
dataBlockEncoder.beforeWriteToDisk(rawKeyValues,
includesMemstoreTS, dataBlockEncodingCtx, blockType);
dataBlockEncoder.beforeWriteToDisk(rawKeyValues, dataBlockEncodingCtx, blockType);
uncompressedBytesWithHeader =
dataBlockEncodingCtx.getUncompressedBytesWithHeader();
@ -714,11 +730,13 @@ public class TestHFileBlockCompatibility {
* Creates a new HFileBlock.
*/
public HFileBlock getBlockForCaching() {
meta.setUsesHBaseChecksum(false);
meta.setChecksumType(ChecksumType.NULL);
meta.setBytesPerChecksum(0);
return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(),
getUncompressedSizeWithoutHeader(), prevOffset,
getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset,
includesMemstoreTS, MINOR_VERSION, 0, ChecksumType.NULL.getCode(),
getOnDiskSizeWithoutHeader());
getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset,
getOnDiskSizeWithoutHeader(), meta);
}
}

View File

@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.MediumTests;
import org.apache.hadoop.hbase.fs.HFileSystem;
@ -118,9 +119,27 @@ public class TestHFileBlockIndex {
@Test
public void testBlockIndex() throws IOException {
path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr);
writeWholeIndex();
readIndex();
testBlockIndexInternals(false);
clear();
testBlockIndexInternals(true);
}
private void clear() throws IOException {
keys.clear();
rand = new Random(2389757);
firstKeyInFile = null;
conf = TEST_UTIL.getConfiguration();
// This test requires at least HFile format version 2.
conf.setInt(HFile.FORMAT_VERSION_KEY, 3);
fs = HFileSystem.get(conf);
}
protected void testBlockIndexInternals(boolean useTags) throws IOException {
path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags);
writeWholeIndex(useTags);
readIndex(useTags);
}
/**
@ -164,13 +183,18 @@ public class TestHFileBlockIndex {
}
}
public void readIndex() throws IOException {
public void readIndex(boolean useTags) throws IOException {
long fileSize = fs.getFileStatus(path).getLen();
LOG.info("Size of " + path + ": " + fileSize);
FSDataInputStream istream = fs.open(path);
HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream,
compr, fs.getFileStatus(path).getLen());
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(true);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(useTags);
meta.setCompressAlgo(compr);
HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream, fs.getFileStatus(path)
.getLen(), meta);
BlockReaderWrapper brw = new BlockReaderWrapper(blockReader);
HFileBlockIndex.BlockIndexReader indexReader =
@ -215,11 +239,17 @@ public class TestHFileBlockIndex {
istream.close();
}
private void writeWholeIndex() throws IOException {
private void writeWholeIndex(boolean useTags) throws IOException {
assertEquals(0, keys.size());
HFileBlock.Writer hbw = new HFileBlock.Writer(compr, null,
includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE,
HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileContext meta = new HFileContext();
meta.setUsesHBaseChecksum(true);
meta.setIncludesMvcc(includesMemstoreTS);
meta.setIncludesTags(useTags);
meta.setCompressAlgo(compr);
meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE);
meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM);
HFileBlock.Writer hbw = new HFileBlock.Writer(null,
meta);
FSDataOutputStream outputStream = fs.create(path);
HFileBlockIndex.BlockIndexWriter biw =
new HFileBlockIndex.BlockIndexWriter(hbw, null, null);
@ -486,11 +516,13 @@ public class TestHFileBlockIndex {
// Write the HFile
{
HFileContext meta = new HFileContext();
meta.setBlocksize(SMALL_BLOCK_SIZE);
meta.setCompressAlgo(compr);
HFile.Writer writer =
HFile.getWriterFactory(conf, cacheConf)
.withPath(fs, hfilePath)
.withBlockSize(SMALL_BLOCK_SIZE)
.withCompression(compr)
.withFileContext(meta)
.create();
Random rand = new Random(19231737);
@ -502,7 +534,7 @@ public class TestHFileBlockIndex {
row, 0, 0).getKey();
byte[] v = TestHFileWriterV2.randomValue(rand);
writer.append(k, v);
writer.append(k, v, HConstants.EMPTY_BYTE_ARRAY);
keys[i] = k;
values[i] = v;
keyStrSet.add(Bytes.toStringBinary(k));

Some files were not shown because too many files have changed in this diff Show More