diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java index 50282846cbe..63254a2613e 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; @@ -96,7 +97,7 @@ public abstract class Mutation extends OperationWithAttributes implements Row, C } /* - * Create a nnnnnnnn with this objects row key and the Put identifier. + * Create a KeyValue with this objects row key and the Put identifier. * * @return a KeyValue with this objects row key and the Put identifier. */ @@ -104,6 +105,20 @@ public abstract class Mutation extends OperationWithAttributes implements Row, C return new KeyValue(this.row, family, qualifier, ts, KeyValue.Type.Put, value); } + /** + * Create a KeyValue with this objects row key and the Put identifier. + * @param family + * @param qualifier + * @param ts + * @param value + * @param tags - Specify the Tags as an Array {@link KeyValue.Tag} + * @return a KeyValue with this objects row key and the Put identifier. + */ + KeyValue createPutKeyValue(byte[] family, byte[] qualifier, long ts, byte[] value, Tag[] tags) { + KeyValue kvWithTag = new KeyValue(this.row, family, qualifier, ts, value, tags); + return kvWithTag; + } + /** * Compile the column family (i.e. schema) information * into a Map. Useful for parsing and aggregation by debugging, diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java index 3a1fb70d669..41ec446f7f1 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.Bytes; @@ -112,6 +113,10 @@ public class Put extends Mutation implements HeapSize, Comparable { return add(family, qualifier, this.ts, value); } + public Put add(byte[] family, byte [] qualifier, byte [] value, Tag[] tag) { + return add(family, qualifier, this.ts, value, tag); + } + /** * Add the specified column and value, with the specified timestamp as * its version to this Put operation. @@ -132,6 +137,18 @@ public class Put extends Mutation implements HeapSize, Comparable { return this; } + /** + * Forms a keyvalue with tags + */ + @SuppressWarnings("unchecked") + public Put add(byte[] family, byte[] qualifier, long ts, byte[] value, Tag[] tag) { + List list = getCellList(family); + KeyValue kv = createPutKeyValue(family, qualifier, ts, value, tag); + list.add(kv); + familyMap.put(kv.getFamily(), list); + return this; + } + /** * Add the specified KeyValue to this Put operation. Operation assumes that * the passed KeyValue is immutable and its backing array will not be modified diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java index be33fdb58d7..45a753ed732 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java @@ -40,7 +40,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; @@ -50,6 +49,8 @@ import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.client.Append; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Durability; @@ -99,12 +100,12 @@ import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.Col import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ScanRequest; +import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad; import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; import org.apache.hadoop.hbase.protobuf.generated.FilterProtos; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.NameBytesPair; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo; -import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos.RegionLoad; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType; import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos; @@ -471,7 +472,18 @@ public final class ProtobufUtil { if (qv.hasTimestamp()) { ts = qv.getTimestamp(); } - put.add(family, qualifier, ts, value); + byte[] tags; + if (qv.hasTags()) { + tags = qv.getTags().toByteArray(); + Object[] array = Tag.createTags(tags, 0, (short)tags.length).toArray(); + Tag[] tagArray = new Tag[array.length]; + for(int i = 0; i< array.length; i++) { + tagArray[i] = (Tag)array[i]; + } + put.add(family, qualifier, ts, value, tagArray); + } else { + put.add(family, qualifier, ts, value); + } } } } @@ -972,6 +984,9 @@ public final class ProtobufUtil { valueBuilder.setQualifier(ByteString.copyFrom(kv.getQualifier())); valueBuilder.setValue(ByteString.copyFrom(kv.getValue())); valueBuilder.setTimestamp(kv.getTimestamp()); + if(cell.getTagsLength() > 0) { + valueBuilder.setTags(ByteString.copyFrom(CellUtil.getTagArray(kv))); + } if (type == MutationType.DELETE) { KeyValue.Type keyValueType = KeyValue.Type.codeToType(kv.getType()); valueBuilder.setDeleteType(toDeleteType(keyValueType)); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java index 7728b7d211a..5286d2aa17b 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellUtil.java @@ -53,6 +53,9 @@ public final class CellUtil { cell.getQualifierLength()); } + public static ByteRange fillTagRange(Cell cell, ByteRange range) { + return range.set(cell.getTagsArray(), cell.getTagsOffset(), cell.getTagsLength()); + } /***************** get individual arrays for tests ************/ @@ -79,6 +82,12 @@ public final class CellUtil { copyValueTo(cell, output, 0); return output; } + + public static byte[] getTagArray(Cell cell){ + byte[] output = new byte[cell.getTagsLength()]; + copyTagTo(cell, output, 0); + return output; + } /******************** copyTo **********************************/ @@ -103,10 +112,22 @@ public final class CellUtil { public static int copyValueTo(Cell cell, byte[] destination, int destinationOffset) { System.arraycopy(cell.getValueArray(), cell.getValueOffset(), destination, destinationOffset, - cell.getValueLength()); + cell.getValueLength()); return destinationOffset + cell.getValueLength(); } + /** + * Copies the tags info into the tag portion of the cell + * @param cell + * @param destination + * @param destinationOffset + * @return position after tags + */ + public static int copyTagTo(Cell cell, byte[] destination, int destinationOffset) { + System.arraycopy(cell.getTagsArray(), cell.getTagsOffset(), destination, destinationOffset, + cell.getTagsLength()); + return destinationOffset + cell.getTagsLength(); + } /********************* misc *************************************/ @@ -134,18 +155,23 @@ public final class CellUtil { return new KeyValue(row, family, qualifier, timestamp, KeyValue.Type.codeToType(type), value); } - + public static Cell createCell(final byte[] row, final byte[] family, final byte[] qualifier, final long timestamp, final byte type, final byte[] value, final long memstoreTS) { - // I need a Cell Factory here. Using KeyValue for now. TODO. - // TODO: Make a new Cell implementation that just carries these - // byte arrays. KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, KeyValue.Type.codeToType(type), value); keyValue.setMvccVersion(memstoreTS); return keyValue; } + public static Cell createCell(final byte[] row, final byte[] family, final byte[] qualifier, + final long timestamp, final byte type, final byte[] value, byte[] tags, final long memstoreTS) { + KeyValue keyValue = new KeyValue(row, family, qualifier, timestamp, + KeyValue.Type.codeToType(type), value, tags); + keyValue.setMvccVersion(memstoreTS); + return keyValue; + } + /** * @param cellScannerables * @return CellScanner interface over cellIterables diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index 6bb523382a4..20ae37dfc83 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -27,9 +27,12 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; +import java.util.Iterator; +import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; @@ -66,7 +69,14 @@ import com.google.common.primitives.Longs; * The rowlength maximum is Short.MAX_SIZE, column family length maximum * is Byte.MAX_SIZE, and column qualifier + key length must be < * Integer.MAX_SIZE. The column does not contain the family/qualifier delimiter, - * {@link #COLUMN_FAMILY_DELIMITER} + * {@link #COLUMN_FAMILY_DELIMITER}
+ * KeyValue can optionally contain Tags. When it contains tags, it is added in the byte array after + * the value part. The format for this part is: <tagslength><tagsbytes>. + * tagslength maximum is Short.MAX_SIZE. The tagsbytes + * contain one or more tags where as each tag is of the form + * <taglength><tagtype><tagbytes>. tagtype is one byte and + * taglength maximum is Short.MAX_SIZE and it includes 1 byte type length + * and actual tag bytes length. */ @InterfaceAudience.Private public class KeyValue implements Cell, HeapSize, Cloneable { @@ -127,6 +137,11 @@ public class KeyValue implements Cell, HeapSize, Cloneable { // Size of the length ints in a KeyValue datastructure. public static final int KEYVALUE_INFRASTRUCTURE_SIZE = ROW_OFFSET; + /** Size of the tags length field in bytes */ + public static final int TAGS_LENGTH_SIZE = Bytes.SIZEOF_SHORT; + + public static final int KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE = ROW_OFFSET + TAGS_LENGTH_SIZE; + /** * Computes the number of bytes that a KeyValue instance with the provided * characteristics would take up for its underlying data structure. @@ -140,8 +155,46 @@ public class KeyValue implements Cell, HeapSize, Cloneable { */ public static long getKeyValueDataStructureSize(int rlength, int flength, int qlength, int vlength) { - return KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + - getKeyDataStructureSize(rlength, flength, qlength) + vlength; + return KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + + getKeyDataStructureSize(rlength, flength, qlength) + vlength; + } + + /** + * Computes the number of bytes that a KeyValue instance with the provided + * characteristics would take up for its underlying data structure. + * + * @param rlength row length + * @param flength family length + * @param qlength qualifier length + * @param vlength value length + * @param tagsLength total length of the tags + * + * @return the KeyValue data structure length + */ + public static long getKeyValueDataStructureSize(int rlength, int flength, int qlength, + int vlength, int tagsLength) { + if (tagsLength == 0) { + return getKeyValueDataStructureSize(rlength, flength, qlength, vlength); + } + return KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE + + getKeyDataStructureSize(rlength, flength, qlength) + vlength + tagsLength; + } + + /** + * Computes the number of bytes that a KeyValue instance with the provided + * characteristics would take up for its underlying data structure. + * + * @param klength key length + * @param vlength value length + * @param tagsLength total length of the tags + * + * @return the KeyValue data structure length + */ + public static long getKeyValueDataStructureSize(int klength, int vlength, int tagsLength) { + if (tagsLength == 0) { + return KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + klength + vlength; + } + return KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE + klength + vlength + tagsLength; } /** @@ -201,6 +254,38 @@ public class KeyValue implements Cell, HeapSize, Cloneable { } } + /** + * @return an iterator over the tags in this KeyValue. + */ + public Iterator tagsIterator() { + // Subtract -1 to point to the end of the complete tag byte[] + final int endOffset = this.offset + this.length - 1; + return new Iterator() { + private int pos = getTagsOffset(); + + @Override + public boolean hasNext() { + return this.pos < endOffset; + } + + @Override + public Tag next() { + if (hasNext()) { + short curTagLen = Bytes.toShort(bytes, this.pos); + Tag tag = new Tag(bytes, pos, (short) (curTagLen + Bytes.SIZEOF_SHORT)); + this.pos += Bytes.SIZEOF_SHORT + curTagLen; + return tag; + } + return null; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + /** * Lowest possible key. * Makes a Key with highest possible Timestamp, empty row and column. No @@ -365,6 +450,42 @@ public class KeyValue implements Cell, HeapSize, Cloneable { this(row, family, qualifier, timestamp, Type.Put, value); } + /** + * Constructs KeyValue structure filled with specified values. + * @param row row key + * @param family family name + * @param qualifier column qualifier + * @param timestamp version timestamp + * @param value column value + * @param tags tags + * @throws IllegalArgumentException + */ + public KeyValue(final byte[] row, final byte[] family, + final byte[] qualifier, final long timestamp, final byte[] value, + final Tag[] tags) { + this(row, family, qualifier, timestamp, value, Arrays.asList(tags)); + } + + /** + * Constructs KeyValue structure filled with specified values. + * @param row row key + * @param family family name + * @param qualifier column qualifier + * @param timestamp version timestamp + * @param value column value + * @param tags tags non-empty list of tags or null + * @throws IllegalArgumentException + */ + public KeyValue(final byte[] row, final byte[] family, + final byte[] qualifier, final long timestamp, final byte[] value, + final List tags) { + this(row, 0, row==null ? 0 : row.length, + family, 0, family==null ? 0 : family.length, + qualifier, 0, qualifier==null ? 0 : qualifier.length, + timestamp, Type.Put, + value, 0, value==null ? 0 : value.length, tags); + } + /** * Constructs KeyValue structure filled with specified values. * @param row row key @@ -382,6 +503,144 @@ public class KeyValue implements Cell, HeapSize, Cloneable { timestamp, type, value, 0, len(value)); } + /** + * Constructs KeyValue structure filled with specified values. + *

+ * Column is split into two fields, family and qualifier. + * @param row row key + * @param family family name + * @param qualifier column qualifier + * @param timestamp version timestamp + * @param type key type + * @param value column value + * @throws IllegalArgumentException + */ + public KeyValue(final byte[] row, final byte[] family, + final byte[] qualifier, final long timestamp, Type type, + final byte[] value, final List tags) { + this(row, family, qualifier, 0, qualifier==null ? 0 : qualifier.length, + timestamp, type, value, 0, value==null ? 0 : value.length, tags); + } + + /** + * Constructs KeyValue structure filled with specified values. + * @param row row key + * @param family family name + * @param qualifier column qualifier + * @param timestamp version timestamp + * @param type key type + * @param value column value + * @throws IllegalArgumentException + */ + public KeyValue(final byte[] row, final byte[] family, + final byte[] qualifier, final long timestamp, Type type, + final byte[] value, final byte[] tags) { + this(row, family, qualifier, 0, qualifier==null ? 0 : qualifier.length, + timestamp, type, value, 0, value==null ? 0 : value.length, tags); + } + + /** + * Constructs KeyValue structure filled with specified values. + * @param row row key + * @param family family name + * @param qualifier column qualifier + * @param qoffset qualifier offset + * @param qlength qualifier length + * @param timestamp version timestamp + * @param type key type + * @param value column value + * @param voffset value offset + * @param vlength value length + * @throws IllegalArgumentException + */ + public KeyValue(byte [] row, byte [] family, + byte [] qualifier, int qoffset, int qlength, long timestamp, Type type, + byte [] value, int voffset, int vlength, List tags) { + this(row, 0, row==null ? 0 : row.length, + family, 0, family==null ? 0 : family.length, + qualifier, qoffset, qlength, timestamp, type, + value, voffset, vlength, tags); + } + + /** + * @param row + * @param family + * @param qualifier + * @param qoffset + * @param qlength + * @param timestamp + * @param type + * @param value + * @param voffset + * @param vlength + * @param tags + */ + public KeyValue(byte [] row, byte [] family, + byte [] qualifier, int qoffset, int qlength, long timestamp, Type type, + byte [] value, int voffset, int vlength, byte[] tags) { + this(row, 0, row==null ? 0 : row.length, + family, 0, family==null ? 0 : family.length, + qualifier, qoffset, qlength, timestamp, type, + value, voffset, vlength, tags, 0, tags==null ? 0 : tags.length); + } + + /** + * Constructs KeyValue structure filled with specified values. + *

+ * Column is split into two fields, family and qualifier. + * @param row row key + * @throws IllegalArgumentException + */ + public KeyValue(final byte [] row, final int roffset, final int rlength, + final byte [] family, final int foffset, final int flength, + final byte [] qualifier, final int qoffset, final int qlength, + final long timestamp, final Type type, + final byte [] value, final int voffset, final int vlength) { + this(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, + qlength, timestamp, type, value, voffset, vlength, null); + } + + /** + * Constructs KeyValue structure filled with specified values. Uses the provided buffer as the + * data buffer. + *

+ * Column is split into two fields, family and qualifier. + * + * @param buffer the bytes buffer to use + * @param boffset buffer offset + * @param row row key + * @param roffset row offset + * @param rlength row length + * @param family family name + * @param foffset family offset + * @param flength family length + * @param qualifier column qualifier + * @param qoffset qualifier offset + * @param qlength qualifier length + * @param timestamp version timestamp + * @param type key type + * @param value column value + * @param voffset value offset + * @param vlength value length + * @param tags non-empty list of tags or null + * @throws IllegalArgumentException an illegal value was passed or there is insufficient space + * remaining in the buffer + */ + public KeyValue(byte [] buffer, final int boffset, + final byte [] row, final int roffset, final int rlength, + final byte [] family, final int foffset, final int flength, + final byte [] qualifier, final int qoffset, final int qlength, + final long timestamp, final Type type, + final byte [] value, final int voffset, final int vlength, + final Tag[] tags) { + this.bytes = buffer; + this.length = writeByteArray(buffer, boffset, + row, roffset, rlength, + family, foffset, flength, qualifier, qoffset, qlength, + timestamp, type, value, voffset, vlength, tags); + this.offset = boffset; + } + /** * Constructs KeyValue structure filled with specified values. *

@@ -400,16 +659,48 @@ public class KeyValue implements Cell, HeapSize, Cloneable { * @param value column value * @param voffset value offset * @param vlength value length + * @param tags tags * @throws IllegalArgumentException */ public KeyValue(final byte [] row, final int roffset, final int rlength, final byte [] family, final int foffset, final int flength, final byte [] qualifier, final int qoffset, final int qlength, final long timestamp, final Type type, - final byte [] value, final int voffset, final int vlength) { + final byte [] value, final int voffset, final int vlength, + final List tags) { this.bytes = createByteArray(row, roffset, rlength, family, foffset, flength, qualifier, qoffset, qlength, - timestamp, type, value, voffset, vlength); + timestamp, type, value, voffset, vlength, tags); + this.length = bytes.length; + this.offset = 0; + } + + /** + * @param row + * @param roffset + * @param rlength + * @param family + * @param foffset + * @param flength + * @param qualifier + * @param qoffset + * @param qlength + * @param timestamp + * @param type + * @param value + * @param voffset + * @param vlength + * @param tags + */ + public KeyValue(final byte [] row, final int roffset, final int rlength, + final byte [] family, final int foffset, final int flength, + final byte [] qualifier, final int qoffset, final int qlength, + final long timestamp, final Type type, + final byte [] value, final int voffset, final int vlength, + final byte[] tags, final int tagsOffset, final int tagsLength) { + this.bytes = createByteArray(row, roffset, rlength, + family, foffset, flength, qualifier, qoffset, qlength, + timestamp, type, value, voffset, vlength, tags, tagsOffset, tagsLength); this.length = bytes.length; this.offset = 0; } @@ -432,9 +723,30 @@ public class KeyValue implements Cell, HeapSize, Cloneable { final int qlength, final long timestamp, final Type type, final int vlength) { - this.bytes = createEmptyByteArray(rlength, - flength, qlength, - timestamp, type, vlength); + this(rlength, flength, qlength, timestamp, type, vlength, 0); + } + + /** + * Constructs an empty KeyValue structure, with specified sizes. + * This can be used to partially fill up KeyValues. + *

+ * Column is split into two fields, family and qualifier. + * @param rlength row length + * @param flength family length + * @param qlength qualifier length + * @param timestamp version timestamp + * @param type key type + * @param vlength value length + * @param tagsLength + * @throws IllegalArgumentException + */ + public KeyValue(final int rlength, + final int flength, + final int qlength, + final long timestamp, final Type type, + final int vlength, final int tagsLength) { + this.bytes = createEmptyByteArray(rlength, flength, qlength, timestamp, type, vlength, + tagsLength); this.length = bytes.length; this.offset = 0; } @@ -459,7 +771,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable { * @return The newly created byte array. */ private static byte[] createEmptyByteArray(final int rlength, int flength, - int qlength, final long timestamp, final Type type, int vlength) { + int qlength, final long timestamp, final Type type, int vlength, int tagsLength) { if (rlength > Short.MAX_VALUE) { throw new IllegalArgumentException("Row > " + Short.MAX_VALUE); } @@ -470,6 +782,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable { if (qlength > Integer.MAX_VALUE - rlength - flength) { throw new IllegalArgumentException("Qualifier > " + Integer.MAX_VALUE); } + checkForTagsLength(tagsLength); // Key length long longkeylength = getKeyDataStructureSize(rlength, flength, qlength); if (longkeylength > Integer.MAX_VALUE) { @@ -484,8 +797,8 @@ public class KeyValue implements Cell, HeapSize, Cloneable { } // Allocate right-sized byte array. - byte [] bytes = - new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength)]; + byte[] bytes= new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength, + tagsLength)]; // Write the correct size markers int pos = 0; pos = Bytes.putInt(bytes, pos, keylength); @@ -496,6 +809,10 @@ public class KeyValue implements Cell, HeapSize, Cloneable { pos += flength + qlength; pos = Bytes.putLong(bytes, pos, timestamp); pos = Bytes.putByte(bytes, pos, type.getCode()); + pos += keylength + vlength; + if (tagsLength > 0) { + pos = Bytes.putShort(bytes, pos, (short)(tagsLength & 0x0000ffff)); + } return bytes; } @@ -518,7 +835,6 @@ public class KeyValue implements Cell, HeapSize, Cloneable { final byte [] qualifier, int qlength, final byte [] value, int vlength) throws IllegalArgumentException { - if (rlength > Short.MAX_VALUE) { throw new IllegalArgumentException("Row > " + Short.MAX_VALUE); } @@ -579,12 +895,21 @@ public class KeyValue implements Cell, HeapSize, Cloneable { final byte [] family, final int foffset, int flength, final byte [] qualifier, final int qoffset, int qlength, final long timestamp, final Type type, - final byte [] value, final int voffset, int vlength) { + final byte [] value, final int voffset, int vlength, Tag[] tags) { checkParameters(row, rlength, family, flength, qualifier, qlength, value, vlength); + // Calculate length of tags area + int tagsLength = 0; + if (tags != null && tags.length > 0) { + for (Tag t: tags) { + tagsLength += t.getLength(); + } + } + checkForTagsLength(tagsLength); int keyLength = (int) getKeyDataStructureSize(rlength, flength, qlength); - int keyValueLength = (int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength); + int keyValueLength = (int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength, + tagsLength); if (keyValueLength > buffer.length - boffset) { throw new IllegalArgumentException("Buffer size " + (buffer.length - boffset) + " < " + keyValueLength); @@ -608,13 +933,24 @@ public class KeyValue implements Cell, HeapSize, Cloneable { if (value != null && value.length > 0) { pos = Bytes.putBytes(buffer, pos, value, voffset, vlength); } - + // Write the number of tags. If it is 0 then it means there are no tags. + if (tagsLength > 0) { + pos = Bytes.putShort(buffer, pos, (short) tagsLength); + for (Tag t : tags) { + pos = Bytes.putBytes(buffer, pos, t.getBuffer(), t.getOffset(), t.getLength()); + } + } return keyValueLength; } + private static void checkForTagsLength(int tagsLength) { + if (tagsLength > Short.MAX_VALUE) { + throw new IllegalArgumentException("tagslength "+ tagsLength + " > " + Short.MAX_VALUE); + } + } + /** * Write KeyValue format into a byte array. - * * @param row row key * @param roffset row offset * @param rlength row length @@ -635,14 +971,15 @@ public class KeyValue implements Cell, HeapSize, Cloneable { final int rlength, final byte [] family, final int foffset, int flength, final byte [] qualifier, final int qoffset, int qlength, final long timestamp, final Type type, - final byte [] value, final int voffset, int vlength) { + final byte [] value, final int voffset, + int vlength, byte[] tags, int tagsOffset, int tagsLength) { checkParameters(row, rlength, family, flength, qualifier, qlength, value, vlength); - + checkForTagsLength(tagsLength); // Allocate right-sized byte array. int keyLength = (int) getKeyDataStructureSize(rlength, flength, qlength); byte [] bytes = - new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength)]; + new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength, tagsLength)]; // Write key, value and key row length. int pos = 0; pos = Bytes.putInt(bytes, pos, keyLength); @@ -661,8 +998,64 @@ public class KeyValue implements Cell, HeapSize, Cloneable { if (value != null && value.length > 0) { pos = Bytes.putBytes(bytes, pos, value, voffset, vlength); } + // Add the tags after the value part + if (tagsLength > 0) { + pos = Bytes.putShort(bytes, pos, (short) (tagsLength)); + pos = Bytes.putBytes(bytes, pos, tags, tagsOffset, tagsLength); + } return bytes; } + + private static byte [] createByteArray(final byte [] row, final int roffset, + final int rlength, final byte [] family, final int foffset, int flength, + final byte [] qualifier, final int qoffset, int qlength, + final long timestamp, final Type type, + final byte [] value, final int voffset, int vlength, List tags) { + + checkParameters(row, rlength, family, flength, qualifier, qlength, value, vlength); + + // Calculate length of tags area + int tagsLength = 0; + if (tags != null && !tags.isEmpty()) { + for (Tag t : tags) { + tagsLength += t.getLength(); + } + } + checkForTagsLength(tagsLength); + // Allocate right-sized byte array. + int keyLength = (int) getKeyDataStructureSize(rlength, flength, qlength); + byte[] bytes = new byte[(int) getKeyValueDataStructureSize(rlength, flength, qlength, vlength, + tagsLength)]; + + // Write key, value and key row length. + int pos = 0; + pos = Bytes.putInt(bytes, pos, keyLength); + + pos = Bytes.putInt(bytes, pos, vlength); + pos = Bytes.putShort(bytes, pos, (short)(rlength & 0x0000ffff)); + pos = Bytes.putBytes(bytes, pos, row, roffset, rlength); + pos = Bytes.putByte(bytes, pos, (byte)(flength & 0x0000ff)); + if(flength != 0) { + pos = Bytes.putBytes(bytes, pos, family, foffset, flength); + } + if(qlength != 0) { + pos = Bytes.putBytes(bytes, pos, qualifier, qoffset, qlength); + } + pos = Bytes.putLong(bytes, pos, timestamp); + pos = Bytes.putByte(bytes, pos, type.getCode()); + if (value != null && value.length > 0) { + pos = Bytes.putBytes(bytes, pos, value, voffset, vlength); + } + // Add the tags after the value part + if (tagsLength > 0) { + pos = Bytes.putShort(bytes, pos, (short) (tagsLength)); + for (Tag t : tags) { + pos = Bytes.putBytes(bytes, pos, t.getBuffer(), t.getOffset(), t.getLength()); + } + } + return bytes; + } + /** * Needed doing 'contains' on List. Only compares the key portion, not the value. @@ -743,13 +1136,6 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return keyToString(k, 0, k.length); } - /** - * Use for logging. - * @param b Key portion of a KeyValue. - * @param o Offset to start of key - * @param l Length of key. - * @return Key as a String. - */ /** * Produces a string map for this key/value pair. Useful for programmatic use * and manipulation of the data stored in an HLogKey, for example, printing @@ -765,9 +1151,24 @@ public class KeyValue implements Cell, HeapSize, Cloneable { stringMap.put("qualifier", Bytes.toStringBinary(getQualifier())); stringMap.put("timestamp", getTimestamp()); stringMap.put("vlen", getValueLength()); + List tags = getTags(); + if (tags != null) { + List tagsString = new ArrayList(); + for (Tag t : tags) { + tagsString.add((t.getType()) + ":" +Bytes.toStringBinary(t.getValue())); + } + stringMap.put("tag", tagsString); + } return stringMap; } + /** + * Use for logging. + * @param b Key portion of a KeyValue. + * @param o Offset to start of key + * @param l Length of key. + * @return Key as a String. + */ public static String keyToString(final byte [] b, final int o, final int l) { if (b == null) return ""; int rowlength = Bytes.toShort(b, o); @@ -839,9 +1240,9 @@ public class KeyValue implements Cell, HeapSize, Cloneable { * @return length of entire KeyValue, in bytes */ private static int getLength(byte [] bytes, int offset) { - return ROW_OFFSET + - Bytes.toInt(bytes, offset) + - Bytes.toInt(bytes, offset + Bytes.SIZEOF_INT); + int klength = ROW_OFFSET + Bytes.toInt(bytes, offset); + int vlength = Bytes.toInt(bytes, offset + Bytes.SIZEOF_INT); + return klength + vlength; } /** @@ -876,11 +1277,12 @@ public class KeyValue implements Cell, HeapSize, Cloneable { } /** - * @return Value offset + * @return the value offset */ @Override public int getValueOffset() { - return getKeyOffset() + getKeyLength(); + int voffset = getKeyOffset() + getKeyLength(); + return voffset; } /** @@ -888,7 +1290,8 @@ public class KeyValue implements Cell, HeapSize, Cloneable { */ @Override public int getValueLength() { - return Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT); + int vlength = Bytes.toInt(this.bytes, this.offset + Bytes.SIZEOF_INT); + return vlength; } /** @@ -1185,6 +1588,55 @@ public class KeyValue implements Cell, HeapSize, Cloneable { return CellUtil.cloneQualifier(this); } + /** + * This returns the offset where the tag actually starts. + */ + @Override + public int getTagsOffset() { + short tagsLen = getTagsLength(); + if (tagsLen == 0) { + return this.offset + this.length; + } + return this.offset + this.length - tagsLen; + } + + /** + * This returns the total length of the tag bytes + */ + @Override + public short getTagsLength() { + int tagsLen = this.length - (getKeyLength() + getValueLength() + KEYVALUE_INFRASTRUCTURE_SIZE); + if (tagsLen > 0) { + // There are some Tag bytes in the byte[]. So reduce 2 bytes which is added to denote the tags + // length + tagsLen -= TAGS_LENGTH_SIZE; + } + return (short) tagsLen; + } + + /** + * This method may not be right. But we cannot use the CellUtil.getTagIterator because we don't know + * getKeyOffset and getKeyLength + * Cannnot use the getKeyOffset and getKeyLength in CellUtil as they are not part of the Cell interface. + * Returns any tags embedded in the KeyValue. + * @return The tags + */ + public List getTags() { + short tagsLength = getTagsLength(); + if (tagsLength == 0) { + return new ArrayList(); + } + return Tag.createTags(getBuffer(), getTagsOffset(), tagsLength); + } + + /** + * @return the backing array of the entire KeyValue (all KeyValue fields are in a single array) + */ + @Override + public byte[] getTagsArray() { + return bytes; + } + //--------------------------------------------------------------------------- // // Compare specified fields against those contained in this KeyValue @@ -2169,7 +2621,7 @@ public class KeyValue implements Cell, HeapSize, Cloneable { int len = writeByteArray(buffer, boffset, row, roffset, rlength, family, foffset, flength, qualifier, qoffset, qlength, HConstants.LATEST_TIMESTAMP, KeyValue.Type.Maximum, - null, 0, 0); + null, 0, 0, null); return new KeyValue(buffer, boffset, len); } @@ -2424,22 +2876,4 @@ public class KeyValue implements Cell, HeapSize, Cloneable { sum += Bytes.SIZEOF_LONG;// memstoreTS return ClassSize.align(sum); } - - // ----- - // KV tags stubs - @Override - public int getTagsOffset() { - throw new UnsupportedOperationException("Not implememnted"); - } - - @Override - public short getTagsLength() { - throw new UnsupportedOperationException("Not implememnted"); - } - - @Override - public byte[] getTagsArray() { - throw new UnsupportedOperationException("Not implememnted"); - } - } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java index 036cf70bde8..6c9fa71ae9b 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueTestUtil.java @@ -93,12 +93,12 @@ public class KeyValueTestUtil { } public static List rewindThenToList(final ByteBuffer bb, - final boolean includesMemstoreTS) { + final boolean includesMemstoreTS, final boolean useTags) { bb.rewind(); List kvs = Lists.newArrayList(); KeyValue kv = null; while (true) { - kv = KeyValueUtil.nextShallowCopy(bb, includesMemstoreTS); + kv = KeyValueUtil.nextShallowCopy(bb, includesMemstoreTS, useTags); if (kv == null) { break; } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java index 90aef087320..aa56f743558 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java @@ -24,9 +24,9 @@ import java.util.List; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.util.ByteBufferUtils; -import org.apache.hadoop.hbase.util.SimpleByteRange; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IterableUtils; +import org.apache.hadoop.hbase.util.SimpleByteRange; import org.apache.hadoop.io.WritableUtils; import com.google.common.base.Function; @@ -41,8 +41,9 @@ public class KeyValueUtil { /**************** length *********************/ public static int length(final Cell cell) { - return (int)KeyValue.getKeyValueDataStructureSize(cell.getRowLength(), cell.getFamilyLength(), - cell.getQualifierLength(), cell.getValueLength()); + return (int) (KeyValue.getKeyValueDataStructureSize(cell.getRowLength(), + cell.getFamilyLength(), cell.getQualifierLength(), cell.getValueLength(), + cell.getTagsLength())); } protected static int keyLength(final Cell cell) { @@ -71,7 +72,8 @@ public class KeyValueUtil { /**************** copy key only *********************/ public static KeyValue copyToNewKeyValue(final Cell cell) { - KeyValue kvCell = new KeyValue(copyToNewByteArray(cell)); + byte[] bytes = copyToNewByteArray(cell); + KeyValue kvCell = new KeyValue(bytes, 0, bytes.length); kvCell.setMvccVersion(cell.getMvccVersion()); return kvCell; } @@ -112,8 +114,12 @@ public class KeyValueUtil { pos = Bytes.putInt(output, pos, keyLength(cell)); pos = Bytes.putInt(output, pos, cell.getValueLength()); pos = appendKeyToByteArrayWithoutValue(cell, output, pos); - CellUtil.copyValueTo(cell, output, pos); - return pos + cell.getValueLength(); + pos = CellUtil.copyValueTo(cell, output, pos); + if ((cell.getTagsLength() > 0)) { + pos = Bytes.putShort(output, pos, cell.getTagsLength()); + pos = CellUtil.copyTagTo(cell, output, pos); + } + return pos; } public static ByteBuffer copyToNewByteBuffer(final Cell cell) { @@ -142,20 +148,30 @@ public class KeyValueUtil { /** * Creates a new KeyValue object positioned in the supplied ByteBuffer and sets the ByteBuffer's * position to the start of the next KeyValue. Does not allocate a new array or copy data. + * @param bb + * @param includesMvccVersion + * @param includesTags */ - public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion) { + public static KeyValue nextShallowCopy(final ByteBuffer bb, final boolean includesMvccVersion, + boolean includesTags) { if (bb.isDirect()) { throw new IllegalArgumentException("only supports heap buffers"); } if (bb.remaining() < 1) { return null; } + KeyValue keyValue = null; int underlyingArrayOffset = bb.arrayOffset() + bb.position(); int keyLength = bb.getInt(); int valueLength = bb.getInt(); - int kvLength = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keyLength + valueLength; - KeyValue keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); ByteBufferUtils.skip(bb, keyLength + valueLength); + short tagsLength = 0; + if (includesTags) { + tagsLength = bb.getShort(); + ByteBufferUtils.skip(bb, tagsLength); + } + int kvLength = (int) KeyValue.getKeyValueDataStructureSize(keyLength, valueLength, tagsLength); + keyValue = new KeyValue(bb.array(), underlyingArrayOffset, kvLength); if (includesMvccVersion) { long mvccVersion = ByteBufferUtils.readVLong(bb); keyValue.setMvccVersion(mvccVersion); diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/Tag.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/Tag.java new file mode 100644 index 00000000000..03a9e817c4e --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/Tag.java @@ -0,0 +1,174 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * <taglength><tagtype><tagbytes>. tagtype is + * one byte and taglength maximum is Short.MAX_SIZE. + * It includes 1 byte type length and actual tag bytes length. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public class Tag { + public final static int TYPE_LENGTH_SIZE = Bytes.SIZEOF_BYTE; + public final static int TAG_LENGTH_SIZE = Bytes.SIZEOF_SHORT; + public final static int INFRASTRUCTURE_SIZE = TYPE_LENGTH_SIZE + TAG_LENGTH_SIZE; + + private byte type; + private byte[] bytes; + private int offset = 0; + private short length = 0; + + // The special tag will write the length of each tag and that will be + // followed by the type and then the actual tag. + // So every time the length part is parsed we need to add + 1 byte to it to + // get the type and then get the actual tag. + public Tag(byte tagType, String tag) { + this(tagType, Bytes.toBytes(tag)); + } + + /** + * @param tagType + * @param tag + */ + public Tag(byte tagType, byte[] tag) { + // + short tagLength = (short) ((tag.length & 0x0000ffff) + TYPE_LENGTH_SIZE); + length = (short) (TAG_LENGTH_SIZE + tagLength); + bytes = new byte[length]; + int pos = Bytes.putShort(bytes, 0, tagLength); + pos = Bytes.putByte(bytes, pos, tagType); + Bytes.putBytes(bytes, pos, tag, 0, tag.length); + this.type = tagType; + } + + /** + * Creates a Tag from the specified byte array and offset. Presumes + * bytes content starting at offset is formatted as + * a Tag blob. + * The bytes to include the tag type, tag length and actual tag bytes. + * @param bytes + * byte array + * @param offset + * offset to start of Tag + */ + public Tag(byte[] bytes, int offset) { + this(bytes, offset, getLength(bytes, offset)); + } + + private static short getLength(byte[] bytes, int offset) { + return (short) (TAG_LENGTH_SIZE + Bytes.toShort(bytes, offset)); + } + + /** + * Creates a Tag from the specified byte array, starting at offset, and for + * length length. Presumes bytes content starting at + * offset is formatted as a Tag blob. + * @param bytes + * byte array + * @param offset + * offset to start of the Tag + * @param length + * length of the Tag + */ + public Tag(byte[] bytes, int offset, short length) { + this.bytes = bytes; + this.offset = offset; + this.length = length; + this.type = bytes[offset + TAG_LENGTH_SIZE]; + } + + /** + * @return The byte array backing this Tag. + */ + public byte[] getBuffer() { + return this.bytes; + } + + /** + * @return the tag type + */ + public byte getType() { + return this.type; + } + + /** + * @return Length of actual tag bytes within the backed buffer + */ + public int getTagLength() { + return this.length - INFRASTRUCTURE_SIZE; + } + + /** + * @return Offset of actual tag bytes within the backed buffer + */ + public int getTagOffset() { + return this.offset + INFRASTRUCTURE_SIZE; + } + + public byte[] getValue() { + int tagLength = getTagLength(); + byte[] tag = new byte[tagLength]; + Bytes.putBytes(tag, 0, bytes, getTagOffset(), tagLength); + return tag; + } + + /** + * Creates the list of tags from the byte array b. Expected that b is in the + * expected tag format + * @param b + * @param offset + * @param length + * @return List of tags + */ + public static List createTags(byte[] b, int offset, short length) { + List tags = new ArrayList(); + int pos = offset; + while (pos < offset + length) { + short tagLen = Bytes.toShort(b, pos); + tags.add(new Tag(b, pos, (short) (tagLen + TAG_LENGTH_SIZE))); + pos += TAG_LENGTH_SIZE + tagLen; + } + return tags; + } + + /** + * Returns the total length of the entire tag entity + * @return + */ + short getLength() { + return this.length; + } + + /** + * Returns the offset of the entire tag entity + * @return + */ + int getOffset() { + return this.offset; + } +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/codec/CellCodec.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/codec/CellCodec.java index af8db090929..3d9164c19b0 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/codec/CellCodec.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/codec/CellCodec.java @@ -53,6 +53,8 @@ public class CellCodec implements Codec { this.out.write(cell.getTypeByte()); // Value write(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); + // Write tags + write(cell.getTagsArray(), cell.getTagsOffset(), cell.getTagsLength()); // MvccVersion this.out.write(Bytes.toBytes(cell.getMvccVersion())); } @@ -85,11 +87,12 @@ public class CellCodec implements Codec { long timestamp = Bytes.toLong(longArray); byte type = (byte) this.in.read(); byte [] value = readByteArray(in); + byte[] tags = readByteArray(in); // Read memstore version byte[] memstoreTSArray = new byte[Bytes.SIZEOF_LONG]; IOUtils.readFully(this.in, memstoreTSArray); long memstoreTS = Bytes.toLong(memstoreTSArray); - return CellUtil.createCell(row, family, qualifier, timestamp, type, value, memstoreTS); + return CellUtil.createCell(row, family, qualifier, timestamp, type, value, tags, memstoreTS); } /** diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java index 3eee7693c19..c2c4c4b8e7c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java @@ -26,8 +26,8 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.KeyValue.SamePrefixComparator; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.WritableUtils; @@ -42,8 +42,15 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { @Override public ByteBuffer decodeKeyValues(DataInputStream source, - boolean includesMemstoreTS) throws IOException { - return decodeKeyValues(source, 0, 0, includesMemstoreTS); + HFileBlockDecodingContext blkDecodingCtx) throws IOException { + if (blkDecodingCtx.getClass() != HFileBlockDefaultDecodingContext.class) { + throw new IOException(this.getClass().getName() + " only accepts " + + HFileBlockDefaultDecodingContext.class.getName() + " as the decoding context."); + } + + HFileBlockDefaultDecodingContext decodingCtx = + (HFileBlockDefaultDecodingContext) blkDecodingCtx; + return internalDecodeKeyValues(source, 0, 0, decodingCtx); } protected static class SeekerState { @@ -51,6 +58,8 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { protected int keyLength; protected int valueLength; protected int lastCommonPrefix; + protected int tagLength = 0; + protected int tagOffset = -1; /** We need to store a copy of the key. */ protected byte[] keyBuffer = new byte[INITIAL_KEY_BUFFER_SIZE]; @@ -112,21 +121,30 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { protected abstract static class BufferedEncodedSeeker implements EncodedSeeker { - + protected HFileBlockDecodingContext decodingCtx; protected final KVComparator comparator; protected final SamePrefixComparator samePrefixComparator; protected ByteBuffer currentBuffer; protected STATE current = createSeekerState(); // always valid protected STATE previous = createSeekerState(); // may not be valid - @SuppressWarnings("unchecked") - public BufferedEncodedSeeker(KVComparator comparator) { + public BufferedEncodedSeeker(KVComparator comparator, + HFileBlockDecodingContext decodingCtx) { this.comparator = comparator; if (comparator instanceof SamePrefixComparator) { this.samePrefixComparator = (SamePrefixComparator) comparator; } else { this.samePrefixComparator = null; } + this.decodingCtx = decodingCtx; + } + + protected boolean includesMvcc() { + return this.decodingCtx.getHFileContext().shouldIncludeMvcc(); + } + + protected boolean includesTags() { + return this.decodingCtx.getHFileContext().shouldIncludeTags(); } @Override @@ -152,21 +170,33 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { @Override public ByteBuffer getKeyValueBuffer() { - ByteBuffer kvBuffer = ByteBuffer.allocate( - 2 * Bytes.SIZEOF_INT + current.keyLength + current.valueLength); + ByteBuffer kvBuffer = createKVBuffer(); kvBuffer.putInt(current.keyLength); kvBuffer.putInt(current.valueLength); kvBuffer.put(current.keyBuffer, 0, current.keyLength); kvBuffer.put(currentBuffer.array(), currentBuffer.arrayOffset() + current.valueOffset, current.valueLength); + if (current.tagLength > 0) { + kvBuffer.putShort((short) current.tagLength); + kvBuffer.put(currentBuffer.array(), currentBuffer.arrayOffset() + current.tagOffset, + current.tagLength); + } + return kvBuffer; + } + + protected ByteBuffer createKVBuffer() { + int kvBufSize = (int) KeyValue.getKeyValueDataStructureSize(current.keyLength, + current.valueLength, current.tagLength); + ByteBuffer kvBuffer = ByteBuffer.allocate(kvBufSize); return kvBuffer; } @Override public KeyValue getKeyValue() { ByteBuffer kvBuf = getKeyValueBuffer(); - KeyValue kv = new KeyValue(kvBuf.array(), kvBuf.arrayOffset()); + KeyValue kv = new KeyValue(kvBuf.array(), kvBuf.arrayOffset(), kvBuf.array().length + - kvBuf.arrayOffset()); kv.setMvccVersion(current.memstoreTS); return kv; } @@ -188,6 +218,12 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { return true; } + public void decodeTags() { + current.tagLength = ByteBufferUtils.readCompressedInt(currentBuffer); + current.tagOffset = currentBuffer.position(); + ByteBufferUtils.skip(currentBuffer, current.tagLength); + } + @Override public int seekToKeyInBlock(byte[] key, int offset, int length, boolean seekBefore) { @@ -276,8 +312,13 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { } protected final void afterEncodingKeyValue(ByteBuffer in, - DataOutputStream out, boolean includesMemstoreTS) { - if (includesMemstoreTS) { + DataOutputStream out, HFileBlockDefaultEncodingContext encodingCtx) throws IOException { + if (encodingCtx.getHFileContext().shouldIncludeTags()) { + int tagsLength = in.getShort(); + ByteBufferUtils.putCompressedInt(out, tagsLength); + ByteBufferUtils.moveBufferToStream(out, in, tagsLength); + } + if (encodingCtx.getHFileContext().shouldIncludeMvcc()) { // Copy memstore timestamp from the byte buffer to the output stream. long memstoreTS = -1; try { @@ -291,8 +332,13 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { } protected final void afterDecodingKeyValue(DataInputStream source, - ByteBuffer dest, boolean includesMemstoreTS) { - if (includesMemstoreTS) { + ByteBuffer dest, HFileBlockDefaultDecodingContext decodingCtx) throws IOException { + if (decodingCtx.getHFileContext().shouldIncludeTags()) { + int tagsLength = ByteBufferUtils.readCompressedInt(source); + dest.putShort((short)tagsLength); + ByteBufferUtils.copyFromStreamToBuffer(dest, source, tagsLength); + } + if (decodingCtx.getHFileContext().shouldIncludeMvcc()) { long memstoreTS = -1; try { // Copy memstore timestamp from the data input stream to the byte @@ -307,33 +353,32 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { } @Override - public HFileBlockEncodingContext newDataBlockEncodingContext( - Algorithm compressionAlgorithm, - DataBlockEncoding encoding, byte[] header) { - return new HFileBlockDefaultEncodingContext( - compressionAlgorithm, encoding, header); + public HFileBlockEncodingContext newDataBlockEncodingContext(DataBlockEncoding encoding, + byte[] header, HFileContext meta) { + return new HFileBlockDefaultEncodingContext(encoding, header, meta); } @Override - public HFileBlockDecodingContext newDataBlockDecodingContext( - Algorithm compressionAlgorithm) { - return new HFileBlockDefaultDecodingContext(compressionAlgorithm); + public HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta) { + return new HFileBlockDefaultDecodingContext(meta); } /** * Compress KeyValues and write them to output buffer. * @param out Where to write compressed data. * @param in Source of KeyValue for compression. - * @param includesMemstoreTS true if including memstore timestamp after every - * key-value pair + * @param encodingCtx use the Encoding ctx associated with the current block * @throws IOException If there is an error writing to output stream. */ public abstract void internalEncodeKeyValues(DataOutputStream out, - ByteBuffer in, boolean includesMemstoreTS) throws IOException; + ByteBuffer in, HFileBlockDefaultEncodingContext encodingCtx) throws IOException; + + public abstract ByteBuffer internalDecodeKeyValues(DataInputStream source, + int allocateHeaderLength, int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) + throws IOException; @Override public void encodeKeyValues(ByteBuffer in, - boolean includesMemstoreTS, HFileBlockEncodingContext blkEncodingCtx) throws IOException { if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) { throw new IOException (this.getClass().getName() + " only accepts " @@ -347,7 +392,7 @@ abstract class BufferedDataBlockEncoder implements DataBlockEncoder { DataOutputStream dataOut = ((HFileBlockDefaultEncodingContext) encodingCtx) .getOutputStreamForEncoder(); - internalEncodeKeyValues(dataOut, in, includesMemstoreTS); + internalEncodeKeyValues(dataOut, in, encodingCtx); if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) { encodingCtx.postEncoding(BlockType.ENCODED_DATA); } else { diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java index bc13465e992..0107512813e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/CopyKeyDataBlockEncoder.java @@ -34,24 +34,12 @@ import org.apache.hadoop.hbase.util.Bytes; public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder { @Override public void internalEncodeKeyValues(DataOutputStream out, - ByteBuffer in, boolean includesMemstoreTS) throws IOException { + ByteBuffer in, HFileBlockDefaultEncodingContext encodingCtx) throws IOException { in.rewind(); ByteBufferUtils.putInt(out, in.limit()); ByteBufferUtils.moveBufferToStream(out, in, in.limit()); } - @Override - public ByteBuffer decodeKeyValues(DataInputStream source, - int preserveHeaderLength, int skipLastBytes, boolean includesMemstoreTS) - throws IOException { - int decompressedSize = source.readInt(); - ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + - preserveHeaderLength); - buffer.position(preserveHeaderLength); - ByteBufferUtils.copyFromStreamToBuffer(buffer, source, decompressedSize); - - return buffer; - } @Override public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { @@ -68,8 +56,8 @@ public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder { @Override public EncodedSeeker createSeeker(KVComparator comparator, - final boolean includesMemstoreTS) { - return new BufferedEncodedSeeker(comparator) { + final HFileBlockDecodingContext decodingCtx) { + return new BufferedEncodedSeeker(comparator, decodingCtx) { @Override protected void decodeNext() { current.keyLength = currentBuffer.getInt(); @@ -78,7 +66,11 @@ public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder { currentBuffer.get(current.keyBuffer, 0, current.keyLength); current.valueOffset = currentBuffer.position(); ByteBufferUtils.skip(currentBuffer, current.valueLength); - if (includesMemstoreTS) { + if (includesTags()) { + current.tagLength = currentBuffer.getShort(); + ByteBufferUtils.skip(currentBuffer, current.tagLength); + } + if (includesMvcc()) { current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); } else { current.memstoreTS = 0; @@ -95,4 +87,16 @@ public class CopyKeyDataBlockEncoder extends BufferedDataBlockEncoder { }; } + @Override + public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength, + int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException { + int decompressedSize = source.readInt(); + ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + + allocateHeaderLength); + buffer.position(allocateHeaderLength); + ByteBufferUtils.copyFromStreamToBuffer(buffer, source, decompressedSize); + + return buffer; + } + } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java index 3f7df0db1f7..df77a45beca 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DataBlockEncoder.java @@ -23,8 +23,7 @@ import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; -import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.hbase.io.hfile.HFileContext; /** * Encoding of KeyValue. It aims to be fast and efficient using assumptions: @@ -38,7 +37,7 @@ import org.apache.hadoop.io.RawComparator; * * After encoding, it also optionally compresses the encoded data if a * compression algorithm is specified in HFileBlockEncodingContext argument of - * {@link #encodeKeyValues(ByteBuffer, boolean, HFileBlockEncodingContext)}. + * {@link #encodeKeyValues(ByteBuffer, HFileBlockEncodingContext)}. */ @InterfaceAudience.Private public interface DataBlockEncoder { @@ -49,44 +48,23 @@ public interface DataBlockEncoder { * * @param in * Source of KeyValue for compression. - * @param includesMemstoreTS - * true if including memstore timestamp after every key-value pair - * @param encodingContext + * @param encodingCtx * the encoding context which will contain encoded uncompressed bytes * as well as compressed encoded bytes if compression is enabled, and * also it will reuse resources across multiple calls. * @throws IOException * If there is an error writing to output stream. */ - void encodeKeyValues( - ByteBuffer in, boolean includesMemstoreTS, HFileBlockEncodingContext encodingContext - ) throws IOException; + void encodeKeyValues(ByteBuffer in, HFileBlockEncodingContext encodingCtx) throws IOException; /** * Decode. * @param source Compressed stream of KeyValues. - * @param includesMemstoreTS true if including memstore timestamp after every - * key-value pair + * @param decodingCtx * @return Uncompressed block of KeyValues. * @throws IOException If there is an error in source. */ - ByteBuffer decodeKeyValues( - DataInputStream source, boolean includesMemstoreTS - ) throws IOException; - - /** - * Uncompress. - * @param source encoded stream of KeyValues. - * @param allocateHeaderLength allocate this many bytes for the header. - * @param skipLastBytes Do not copy n last bytes. - * @param includesMemstoreTS true if including memstore timestamp after every - * key-value pair - * @return Uncompressed block of KeyValues. - * @throws IOException If there is an error in source. - */ - ByteBuffer decodeKeyValues( - DataInputStream source, int allocateHeaderLength, int skipLastBytes, boolean includesMemstoreTS - ) + ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx) throws IOException; /** @@ -102,42 +80,36 @@ public interface DataBlockEncoder { /** * Create a HFileBlock seeker which find KeyValues within a block. * @param comparator what kind of comparison should be used - * @param includesMemstoreTS true if including memstore timestamp after every - * key-value pair + * @param decodingCtx * @return A newly created seeker. */ - EncodedSeeker createSeeker( - KVComparator comparator, boolean includesMemstoreTS - ); + EncodedSeeker createSeeker(KVComparator comparator, + HFileBlockDecodingContext decodingCtx); /** * Creates a encoder specific encoding context * - * @param compressionAlgorithm - * compression algorithm used if the final data needs to be - * compressed * @param encoding * encoding strategy used * @param headerBytes * header bytes to be written, put a dummy header here if the header * is unknown + * @param meta + * HFile meta data * @return a newly created encoding context */ HFileBlockEncodingContext newDataBlockEncodingContext( - Algorithm compressionAlgorithm, DataBlockEncoding encoding, byte[] headerBytes - ); + DataBlockEncoding encoding, byte[] headerBytes, HFileContext meta); /** * Creates an encoder specific decoding context, which will prepare the data * before actual decoding * - * @param compressionAlgorithm - * compression algorithm used if the data needs to be decompressed + * @param meta + * HFile meta data * @return a newly created decoding context */ - HFileBlockDecodingContext newDataBlockDecodingContext( - Algorithm compressionAlgorithm - ); + HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta); /** * An interface which enable to seek while underlying data is encoded. diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java index e628d58c98f..144501140c4 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/DiffKeyDeltaEncoder.java @@ -318,7 +318,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { @Override public void internalEncodeKeyValues(DataOutputStream out, - ByteBuffer in, boolean includesMemstoreTS) throws IOException { + ByteBuffer in, HFileBlockDefaultEncodingContext encodingCtx) throws IOException { in.rewind(); ByteBufferUtils.putInt(out, in.limit()); DiffCompressionState previousState = new DiffCompressionState(); @@ -326,7 +326,7 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { while (in.hasRemaining()) { compressSingleKeyValue(previousState, currentState, out, in); - afterEncodingKeyValue(in, out, includesMemstoreTS); + afterEncodingKeyValue(in, out, encodingCtx); // swap previousState <-> currentState DiffCompressionState tmp = previousState; @@ -335,26 +335,6 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { } } - @Override - public ByteBuffer decodeKeyValues(DataInputStream source, - int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS) - throws IOException { - int decompressedSize = source.readInt(); - ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + - allocHeaderLength); - buffer.position(allocHeaderLength); - DiffCompressionState state = new DiffCompressionState(); - while (source.available() > skipLastBytes) { - uncompressSingleKeyValue(source, buffer, state); - afterDecodingKeyValue(source, buffer, includesMemstoreTS); - } - - if (source.available() != skipLastBytes) { - throw new IllegalStateException("Read too much bytes."); - } - - return buffer; - } @Override public ByteBuffer getFirstKeyInBlock(ByteBuffer block) { @@ -424,8 +404,8 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { @Override public EncodedSeeker createSeeker(KVComparator comparator, - final boolean includesMemstoreTS) { - return new BufferedEncodedSeeker(comparator) { + HFileBlockDecodingContext decodingCtx) { + return new BufferedEncodedSeeker(comparator, decodingCtx) { private byte[] familyNameWithSize; private static final int TIMESTAMP_WITH_TYPE_LENGTH = Bytes.SIZEOF_LONG + Bytes.SIZEOF_BYTE; @@ -517,7 +497,10 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { current.valueOffset = currentBuffer.position(); ByteBufferUtils.skip(currentBuffer, current.valueLength); - if (includesMemstoreTS) { + if (includesTags()) { + decodeTags(); + } + if (includesMvcc()) { current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); } else { current.memstoreTS = 0; @@ -549,4 +532,24 @@ public class DiffKeyDeltaEncoder extends BufferedDataBlockEncoder { } }; } + + @Override + public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength, + int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException { + int decompressedSize = source.readInt(); + ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + + allocateHeaderLength); + buffer.position(allocateHeaderLength); + DiffCompressionState state = new DiffCompressionState(); + while (source.available() > skipLastBytes) { + uncompressSingleKeyValue(source, buffer, state); + afterDecodingKeyValue(source, buffer, decodingCtx); + } + + if (source.available() != skipLastBytes) { + throw new IllegalStateException("Read too much bytes."); + } + + return buffer; + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java index 92c5fe5ea45..6cd168ab535 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/EncodedDataBlock.java @@ -29,8 +29,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.hfile.HFileContext; +import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.compress.Compressor; @@ -48,25 +49,26 @@ public class EncodedDataBlock { private DataBlockEncoder dataBlockEncoder; private byte[] cachedEncodedData; - private boolean includesMemstoreTS; private final HFileBlockEncodingContext encodingCtx; + private HFileContext meta; /** * Create a buffer which will be encoded using dataBlockEncoder. * @param dataBlockEncoder Algorithm used for compression. * @param encoding encoding type used * @param rawKVs + * @param meta */ - public EncodedDataBlock(DataBlockEncoder dataBlockEncoder, - boolean includesMemstoreTS, DataBlockEncoding encoding, byte[] rawKVs) { + public EncodedDataBlock(DataBlockEncoder dataBlockEncoder, DataBlockEncoding encoding, + byte[] rawKVs, HFileContext meta) { Preconditions.checkNotNull(encoding, "Cannot create encoded data block with null encoder"); this.dataBlockEncoder = dataBlockEncoder; - encodingCtx = - dataBlockEncoder.newDataBlockEncodingContext(Compression.Algorithm.NONE, - encoding, HConstants.HFILEBLOCK_DUMMY_HEADER); + encodingCtx = dataBlockEncoder.newDataBlockEncodingContext(encoding, + HConstants.HFILEBLOCK_DUMMY_HEADER, meta); this.rawKVs = rawKVs; + this.meta = meta; } /** @@ -97,19 +99,30 @@ public class EncodedDataBlock { public Cell next() { if (decompressedData == null) { try { - decompressedData = dataBlockEncoder.decodeKeyValues( - dis, includesMemstoreTS); + decompressedData = dataBlockEncoder.decodeKeyValues(dis, dataBlockEncoder + .newDataBlockDecodingContext(meta)); } catch (IOException e) { throw new RuntimeException("Problem with data block encoder, " + "most likely it requested more bytes than are available.", e); } decompressedData.rewind(); } - int offset = decompressedData.position(); - KeyValue kv = new KeyValue(decompressedData.array(), offset); - decompressedData.position(offset + kv.getLength()); - + int klen = decompressedData.getInt(); + int vlen = decompressedData.getInt(); + short tagsLen = 0; + ByteBufferUtils.skip(decompressedData, klen + vlen); + // Read the tag length in case when steam contain tags + if (meta.shouldIncludeTags()) { + tagsLen = decompressedData.getShort(); + ByteBufferUtils.skip(decompressedData, tagsLen); + } + KeyValue kv = new KeyValue(decompressedData.array(), offset, + (int) KeyValue.getKeyValueDataStructureSize(klen, vlen, tagsLen)); + if (meta.shouldIncludeMvcc()) { + long mvccVersion = ByteBufferUtils.readVLong(decompressedData); + kv.setMvccVersion(mvccVersion); + } return kv; } @@ -199,7 +212,7 @@ public class EncodedDataBlock { public byte[] encodeData() { try { this.dataBlockEncoder.encodeKeyValues( - getUncompressedBuffer(), includesMemstoreTS, encodingCtx); + getUncompressedBuffer(), encodingCtx); } catch (IOException e) { throw new RuntimeException(String.format( "Bug in encoding part of algorithm %s. " + diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java index 175735563b9..559db7c0d92 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/FastDiffDeltaEncoder.java @@ -343,8 +343,8 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder { } @Override - public void internalEncodeKeyValues(DataOutputStream out, - ByteBuffer in, boolean includesMemstoreTS) throws IOException { + public void internalEncodeKeyValues(DataOutputStream out, ByteBuffer in, + HFileBlockDefaultEncodingContext encodingCtx) throws IOException { in.rewind(); ByteBufferUtils.putInt(out, in.limit()); FastDiffCompressionState previousState = new FastDiffCompressionState(); @@ -352,7 +352,7 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder { while (in.hasRemaining()) { compressSingleKeyValue(previousState, currentState, out, in); - afterEncodingKeyValue(in, out, includesMemstoreTS); + afterEncodingKeyValue(in, out, encodingCtx); // swap previousState <-> currentState FastDiffCompressionState tmp = previousState; @@ -362,17 +362,16 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder { } @Override - public ByteBuffer decodeKeyValues(DataInputStream source, - int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS) - throws IOException { + public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength, + int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException { int decompressedSize = source.readInt(); ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + - allocHeaderLength); - buffer.position(allocHeaderLength); + allocateHeaderLength); + buffer.position(allocateHeaderLength); FastDiffCompressionState state = new FastDiffCompressionState(); while (source.available() > skipLastBytes) { uncompressSingleKeyValue(source, buffer, state); - afterDecodingKeyValue(source, buffer, includesMemstoreTS); + afterDecodingKeyValue(source, buffer, decodingCtx); } if (source.available() != skipLastBytes) { @@ -419,8 +418,8 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder { @Override public EncodedSeeker createSeeker(KVComparator comparator, - final boolean includesMemstoreTS) { - return new BufferedEncodedSeeker(comparator) { + final HFileBlockDecodingContext decodingCtx) { + return new BufferedEncodedSeeker(comparator, decodingCtx) { private void decode(boolean isFirst) { byte flag = currentBuffer.get(); if ((flag & FLAG_SAME_KEY_LENGTH) == 0) { @@ -520,7 +519,10 @@ public class FastDiffDeltaEncoder extends BufferedDataBlockEncoder { ByteBufferUtils.skip(currentBuffer, current.valueLength); } - if (includesMemstoreTS) { + if (includesTags()) { + decodeTags(); + } + if (includesMvcc()) { current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); } else { current.memstoreTS = 0; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDecodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDecodingContext.java index 75064be9d1e..84879dedf24 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDecodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDecodingContext.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.hfile.HFileContext; /** * A decoding context that is created by a reader's encoder, and is shared @@ -31,14 +31,9 @@ import org.apache.hadoop.hbase.io.compress.Compression; @InterfaceAudience.Private public interface HFileBlockDecodingContext { - /** - * @return the compression algorithm used by this decoding context - */ - Compression.Algorithm getCompression(); - /** * Perform all actions that need to be done before the encoder's real decoding process. - * Decompression needs to be done if {@link #getCompression()} returns a valid compression + * Decompression needs to be done if {@link HFileContext#getCompression()} returns a valid compression * algorithm. * * @param onDiskSizeWithoutHeader numBytes after block and encoding headers @@ -57,4 +52,8 @@ public interface HFileBlockDecodingContext { int offset ) throws IOException; + /** + * @return HFile meta information + */ + HFileContext getHFileContext(); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java index 81794f391fd..26e28e242b4 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultDecodingContext.java @@ -24,7 +24,7 @@ import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.compress.Compression; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.hfile.HFileContext; /** * A default implementation of {@link HFileBlockDecodingContext}. It assumes the @@ -37,11 +37,10 @@ import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; public class HFileBlockDefaultDecodingContext implements HFileBlockDecodingContext { - private final Compression.Algorithm compressAlgo; - - public HFileBlockDefaultDecodingContext( - Compression.Algorithm compressAlgo) { - this.compressAlgo = compressAlgo; + private final HFileContext fileContext; + + public HFileBlockDefaultDecodingContext(HFileContext fileContext) { + this.fileContext = fileContext; } @Override @@ -52,12 +51,11 @@ public class HFileBlockDefaultDecodingContext implements Compression.decompress(blockBufferWithoutHeader.array(), blockBufferWithoutHeader.arrayOffset(), (InputStream) dis, onDiskSizeWithoutHeader, - uncompressedSizeWithoutHeader, compressAlgo); + uncompressedSizeWithoutHeader, this.fileContext.getCompression()); } @Override - public Algorithm getCompression() { - return compressAlgo; + public HFileContext getHFileContext() { + return this.fileContext; } - } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java index 43fc82346d6..df95ce2cb0e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockDefaultEncodingContext.java @@ -24,8 +24,8 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.compress.Compression; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.io.compress.CompressionOutputStream; import org.apache.hadoop.io.compress.Compressor; @@ -56,26 +56,25 @@ public class HFileBlockDefaultEncodingContext implements /** Underlying stream to write compressed bytes to */ private ByteArrayOutputStream compressedByteStream; - /** Compression algorithm for all blocks this instance writes. */ - private final Compression.Algorithm compressionAlgorithm; - private ByteArrayOutputStream encodedStream = new ByteArrayOutputStream(); private DataOutputStream dataOut = new DataOutputStream(encodedStream); private byte[] dummyHeader; + private HFileContext fileContext; + /** - * @param compressionAlgorithm compression algorithm used * @param encoding encoding used * @param headerBytes dummy header bytes + * @param fileContext HFile meta data */ - public HFileBlockDefaultEncodingContext( - Compression.Algorithm compressionAlgorithm, - DataBlockEncoding encoding, byte[] headerBytes) { + public HFileBlockDefaultEncodingContext(DataBlockEncoding encoding, byte[] headerBytes, + HFileContext fileContext) { this.encodingAlgo = encoding; - this.compressionAlgorithm = - compressionAlgorithm == null ? NONE : compressionAlgorithm; - if (this.compressionAlgorithm != NONE) { + Compression.Algorithm compressionAlgorithm = + fileContext.getCompression() == null ? NONE : fileContext.getCompression(); + this.fileContext = fileContext; + if (compressionAlgorithm != NONE) { compressor = compressionAlgorithm.getCompressor(); compressedByteStream = new ByteArrayOutputStream(); try { @@ -137,7 +136,7 @@ public class HFileBlockDefaultEncodingContext implements protected void compressAfterEncoding(byte[] uncompressedBytesWithHeader, BlockType blockType, byte[] headerBytes) throws IOException { this.uncompressedBytesWithHeader = uncompressedBytesWithHeader; - if (compressionAlgorithm != NONE) { + if (this.fileContext.getCompression() != NONE) { compressedByteStream.reset(); compressedByteStream.write(headerBytes); compressionStream.resetState(); @@ -176,16 +175,11 @@ public class HFileBlockDefaultEncodingContext implements @Override public void close() { if (compressor != null) { - compressionAlgorithm.returnCompressor(compressor); + this.fileContext.getCompression().returnCompressor(compressor); compressor = null; } } - @Override - public Algorithm getCompression() { - return this.compressionAlgorithm; - } - public DataOutputStream getOutputStreamForEncoder() { return this.dataOut; } @@ -194,4 +188,9 @@ public class HFileBlockDefaultEncodingContext implements public DataBlockEncoding getDataBlockEncoding() { return this.encodingAlgo; } + + @Override + public HFileContext getHFileContext() { + return this.fileContext; + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java index 66a9cfcead2..2fa9112a4bf 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/HFileBlockEncodingContext.java @@ -20,8 +20,8 @@ import java.io.IOException; import java.io.OutputStream; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.HFileContext; /** * An encoding context that is created by a writer's encoder, and is shared @@ -55,11 +55,6 @@ public interface HFileBlockEncodingContext { */ BlockType getBlockType(); - /** - * @return the compression algorithm used by this encoding context - */ - Compression.Algorithm getCompression(); - /** * sets the dummy header bytes */ @@ -72,8 +67,7 @@ public interface HFileBlockEncodingContext { /** * Do any action that needs to be performed after the encoding. - * Compression is also included if {@link #getCompression()} returns non-null - * compression algorithm + * Compression is also included if a non-null compression algorithm is used * * @param blockType * @throws IOException @@ -85,4 +79,8 @@ public interface HFileBlockEncodingContext { */ void close(); + /** + * @return HFile context information + */ + HFileContext getHFileContext(); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java index 74c09b5ee00..e8a6c4957db 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/PrefixKeyDeltaEncoder.java @@ -76,8 +76,8 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder { } @Override - public void internalEncodeKeyValues(DataOutputStream writeHere, - ByteBuffer in, boolean includesMemstoreTS) throws IOException { + public void internalEncodeKeyValues(DataOutputStream writeHere, ByteBuffer in, + HFileBlockDefaultEncodingContext encodingCtx) throws IOException { in.rewind(); ByteBufferUtils.putInt(writeHere, in.limit()); int prevOffset = -1; @@ -86,24 +86,23 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder { while (in.hasRemaining()) { offset = in.position(); keyLength = addKV(prevOffset, writeHere, in, keyLength); - afterEncodingKeyValue(in, writeHere, includesMemstoreTS); + afterEncodingKeyValue(in, writeHere, encodingCtx); prevOffset = offset; } } @Override - public ByteBuffer decodeKeyValues(DataInputStream source, - int allocHeaderLength, int skipLastBytes, boolean includesMemstoreTS) - throws IOException { + public ByteBuffer internalDecodeKeyValues(DataInputStream source, int allocateHeaderLength, + int skipLastBytes, HFileBlockDefaultDecodingContext decodingCtx) throws IOException { int decompressedSize = source.readInt(); ByteBuffer buffer = ByteBuffer.allocate(decompressedSize + - allocHeaderLength); - buffer.position(allocHeaderLength); + allocateHeaderLength); + buffer.position(allocateHeaderLength); int prevKeyOffset = 0; while (source.available() > skipLastBytes) { prevKeyOffset = decodeKeyValue(source, buffer, prevKeyOffset); - afterDecodingKeyValue(source, buffer, includesMemstoreTS); + afterDecodingKeyValue(source, buffer, decodingCtx); } if (source.available() != skipLastBytes) { @@ -166,8 +165,8 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder { @Override public EncodedSeeker createSeeker(KVComparator comparator, - final boolean includesMemstoreTS) { - return new BufferedEncodedSeeker(comparator) { + final HFileBlockDecodingContext decodingCtx) { + return new BufferedEncodedSeeker(comparator, decodingCtx) { @Override protected void decodeNext() { current.keyLength = ByteBufferUtils.readCompressedInt(currentBuffer); @@ -180,7 +179,10 @@ public class PrefixKeyDeltaEncoder extends BufferedDataBlockEncoder { current.keyLength - current.lastCommonPrefix); current.valueOffset = currentBuffer.position(); ByteBufferUtils.skip(currentBuffer, current.valueLength); - if (includesMemstoreTS) { + if (includesTags()) { + decodeTags(); + } + if (includesMvcc()) { current.memstoreTS = ByteBufferUtils.readVLong(currentBuffer); } else { current.memstoreTS = 0; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java new file mode 100644 index 00000000000..04fb5a531a9 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ChecksumType; +import org.apache.hadoop.hbase.util.ClassSize; + +/** + * This carries the information on some of the meta data about the HFile. This + * meta data would be used across the HFileWriter/Readers and the HFileBlocks. + * This would help to add new information to the HFile. + * This class is not meant to be immutable. + */ +@InterfaceAudience.Private +public class HFileContext implements HeapSize, Cloneable { + + public static final int DEFAULT_BYTES_PER_CHECKSUM = 16 * 1024; + public static final ChecksumType DEFAULT_CHECKSUM_TYPE = ChecksumType.CRC32; + + /** Whether checksum is enabled or not**/ + private boolean usesHBaseChecksum = true; + /** Whether mvcc is to be included in the Read/Write**/ + private boolean includesMvcc = true; + /**Whether tags are to be included in the Read/Write**/ + private boolean includesTags; + /**Compression algorithm used**/ + private Algorithm compressAlgo = Algorithm.NONE; + /** Whether tags to be compressed or not**/ + private boolean compressTags; + /** the checksum type **/ + private ChecksumType checksumType = DEFAULT_CHECKSUM_TYPE; + /** the number of bytes per checksum value **/ + private int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM; + /** Number of uncompressed bytes we allow per block. */ + private int blocksize = HConstants.DEFAULT_BLOCKSIZE; + private DataBlockEncoding encodingOnDisk = DataBlockEncoding.NONE; + private DataBlockEncoding encodingInCache = DataBlockEncoding.NONE; + + //Empty constructor. Go with setters + public HFileContext() { + } + + public Algorithm getCompression() { + return compressAlgo; + } + + public void setCompressAlgo(Algorithm compressAlgo) { + this.compressAlgo = compressAlgo; + } + + public boolean shouldUseHBaseChecksum() { + return usesHBaseChecksum; + } + + public void setUsesHBaseChecksum(boolean usesHBaseChecksum) { + this.usesHBaseChecksum = usesHBaseChecksum; + } + + public boolean shouldIncludeMvcc() { + return includesMvcc; + } + + public void setIncludesMvcc(boolean includesMvcc) { + this.includesMvcc = includesMvcc; + } + + public boolean shouldIncludeTags() { + return includesTags; + } + + public void setIncludesTags(boolean includesTags) { + this.includesTags = includesTags; + } + + public boolean shouldCompressTags() { + return compressTags; + } + + public void setCompressTags(boolean compressTags) { + this.compressTags = compressTags; + } + + public ChecksumType getChecksumType() { + return checksumType; + } + + public void setChecksumType(ChecksumType checksumType) { + this.checksumType = checksumType; + } + + public int getBytesPerChecksum() { + return bytesPerChecksum; + } + + public void setBytesPerChecksum(int bytesPerChecksum) { + this.bytesPerChecksum = bytesPerChecksum; + } + + public int getBlocksize() { + return blocksize; + } + + public void setBlocksize(int blocksize) { + this.blocksize = blocksize; + } + + public DataBlockEncoding getEncodingOnDisk() { + return encodingOnDisk; + } + + public void setEncodingOnDisk(DataBlockEncoding encodingOnDisk) { + this.encodingOnDisk = encodingOnDisk; + } + + public DataBlockEncoding getEncodingInCache() { + return encodingInCache; + } + + public void setEncodingInCache(DataBlockEncoding encodingInCache) { + this.encodingInCache = encodingInCache; + } + + /** + * HeapSize implementation + * NOTE : The heapsize should be altered as and when new state variable are added + * @return heap size of the HFileContext + */ + @Override + public long heapSize() { + long size = ClassSize.align(ClassSize.OBJECT + + // Algorithm reference, encodingondisk, encodingincache, checksumtype + 4 * ClassSize.REFERENCE + + 2 * Bytes.SIZEOF_INT + + // usesHBaseChecksum, includesMvcc, includesTags and compressTags + 4 * Bytes.SIZEOF_BOOLEAN); + return size; + } + + @Override + public HFileContext clone() { + HFileContext clonnedCtx = new HFileContext(); + clonnedCtx.usesHBaseChecksum = this.usesHBaseChecksum; + clonnedCtx.includesMvcc = this.includesMvcc; + clonnedCtx.includesTags = this.includesTags; + clonnedCtx.compressAlgo = this.compressAlgo; + clonnedCtx.compressTags = this.compressTags; + clonnedCtx.checksumType = this.checksumType; + clonnedCtx.bytesPerChecksum = this.bytesPerChecksum; + clonnedCtx.blocksize = this.blocksize; + clonnedCtx.encodingOnDisk = this.encodingOnDisk; + clonnedCtx.encodingInCache = this.encodingInCache; + return clonnedCtx; + } +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumFactory.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumFactory.java new file mode 100644 index 00000000000..4fc09e103a6 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumFactory.java @@ -0,0 +1,97 @@ +/** + * Copyright The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.lang.ClassNotFoundException; +import java.util.zip.Checksum; +import java.lang.reflect.Constructor; + +/** + * Utility class that is used to generate a Checksum object. + * The Checksum implementation is pluggable and an application + * can specify their own class that implements their own + * Checksum algorithm. + */ +public class ChecksumFactory { + + static private final Class[] EMPTY_ARRAY = new Class[]{}; + + /** + * Create a new instance of a Checksum object. + * @return The newly created Checksum object + */ + static public Checksum newInstance(String className) throws IOException { + try { + Class clazz = getClassByName(className); + return (Checksum)newInstance(clazz); + } catch (ClassNotFoundException e) { + throw new IOException(e); + } + } + + /** + * Returns a Constructor that can be used to create a Checksum object. + * @param className classname for which an constructor is created + * @return a new Constructor object + */ + static public Constructor newConstructor(String className) + throws IOException { + try { + Class clazz = getClassByName(className); + Constructor ctor = clazz.getDeclaredConstructor(EMPTY_ARRAY); + ctor.setAccessible(true); + return ctor; + } catch (ClassNotFoundException e) { + throw new IOException(e); + } catch (java.lang.NoSuchMethodException e) { + throw new IOException(e); + } + } + + /** Create an object for the given class and initialize it from conf + * + * @param theClass class of which an object is created + * @return a new object + */ + static private T newInstance(Class theClass) { + T result; + try { + Constructor ctor = theClass.getDeclaredConstructor(EMPTY_ARRAY); + ctor.setAccessible(true); + result = ctor.newInstance(); + } catch (Exception e) { + throw new RuntimeException(e); + } + return result; + } + + /** + * Load a class by name. + * @param name the class name. + * @return the class object. + * @throws ClassNotFoundException if the class is not found. + */ + static private Class getClassByName(String name) + throws ClassNotFoundException { + ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); + return Class.forName(name, true, classLoader); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java new file mode 100644 index 00000000000..63d3a2e0dfc --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.util; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.util.zip.Checksum; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +/** + * Checksum types. The Checksum type is a one byte number + * that stores a representation of the checksum algorithm + * used to encode a hfile. The ordinal of these cannot + * change or else you risk breaking all existing HFiles out there. + */ +public enum ChecksumType { + + NULL((byte)0) { + @Override + public String getName() { + return "NULL"; + } + @Override + public void initialize() { + // do nothing + } + @Override + public Checksum getChecksumObject() throws IOException { + return null; // checksums not used + } + }, + + CRC32((byte)1) { + private volatile Constructor ctor; + + @Override + public String getName() { + return "CRC32"; + } + + @Override + public void initialize() { + final String PURECRC32 = "org.apache.hadoop.util.PureJavaCrc32"; + final String JDKCRC = "java.util.zip.CRC32"; + LOG = LogFactory.getLog(ChecksumType.class); + + // check if hadoop library is available + try { + ctor = ChecksumFactory.newConstructor(PURECRC32); + LOG.info("Checksum using " + PURECRC32); + } catch (Exception e) { + LOG.trace(PURECRC32 + " not available."); + } + try { + // The default checksum class name is java.util.zip.CRC32. + // This is available on all JVMs. + if (ctor == null) { + ctor = ChecksumFactory.newConstructor(JDKCRC); + LOG.info("Checksum can use " + JDKCRC); + } + } catch (Exception e) { + LOG.trace(JDKCRC + " not available."); + } + } + + @Override + public Checksum getChecksumObject() throws IOException { + if (ctor == null) { + throw new IOException("Bad constructor for " + getName()); + } + try { + return (Checksum)ctor.newInstance(); + } catch (Exception e) { + throw new IOException(e); + } + } + }, + + CRC32C((byte)2) { + private transient Constructor ctor; + + @Override + public String getName() { + return "CRC32C"; + } + + @Override + public void initialize() { + final String PURECRC32C = "org.apache.hadoop.util.PureJavaCrc32C"; + LOG = LogFactory.getLog(ChecksumType.class); + try { + ctor = ChecksumFactory.newConstructor(PURECRC32C); + LOG.info("Checksum can use " + PURECRC32C); + } catch (Exception e) { + LOG.trace(PURECRC32C + " not available."); + } + } + + @Override + public Checksum getChecksumObject() throws IOException { + if (ctor == null) { + throw new IOException("Bad constructor for " + getName()); + } + try { + return (Checksum)ctor.newInstance(); + } catch (Exception e) { + throw new IOException(e); + } + } + }; + + private final byte code; + protected Log LOG; + + /** initializes the relevant checksum class object */ + abstract void initialize(); + + /** returns the name of this checksum type */ + public abstract String getName(); + + private ChecksumType(final byte c) { + this.code = c; + initialize(); + } + + /** returns a object that can be used to generate/validate checksums */ + public abstract Checksum getChecksumObject() throws IOException; + + public byte getCode() { + return this.code; + } + + /** + * Cannot rely on enum ordinals . They change if item is removed or moved. + * Do our own codes. + * @param b + * @return Type associated with passed code. + */ + public static ChecksumType codeToType(final byte b) { + for (ChecksumType t : ChecksumType.values()) { + if (t.getCode() == b) { + return t; + } + } + throw new RuntimeException("Unknown checksum type code " + b); + } + + /** + * Map a checksum name to a specific type. + * Do our own names. + * @param name + * @return Type associated with passed code. + */ + public static ChecksumType nameToType(final String name) { + for (ChecksumType t : ChecksumType.values()) { + if (t.getName().equals(name)) { + return t; + } + } + throw new RuntimeException("Unknown checksum type name " + name); + } +} diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java index 48fa3e9961a..33b42b3f087 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/test/RedundantKVGenerator.java @@ -26,6 +26,7 @@ import java.util.Random; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.io.WritableUtils; @@ -200,6 +201,14 @@ public class RedundantKVGenerator { * @return sorted list of key values */ public List generateTestKeyValues(int howMany) { + return generateTestKeyValues(howMany, false); + } + /** + * Generate test data useful to test encoders. + * @param howMany How many Key values should be generated. + * @return sorted list of key values + */ + public List generateTestKeyValues(int howMany, boolean useTags) { List result = new ArrayList(); List rows = generateRows(); @@ -267,7 +276,12 @@ public class RedundantKVGenerator { randomizer.nextBytes(value); } - result.add(new KeyValue(row, family, qualifier, timestamp, value)); + if (useTags) { + result.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] { new Tag( + (byte) 1, "value1") })); + } else { + result.add(new KeyValue(row, family, qualifier, timestamp, value)); + } } Collections.sort(result, KeyValue.COMPARATOR); @@ -297,7 +311,6 @@ public class RedundantKVGenerator { ByteBufferUtils.writeVLong(result, kv.getMvccVersion()); } } - return result; } diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java index 1c95d40facd..f0df4720e5a 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestKeyValue.java @@ -20,6 +20,8 @@ package org.apache.hadoop.hbase; import java.io.IOException; import java.util.Arrays; +import java.util.Iterator; +import java.util.List; import java.util.Set; import java.util.TreeSet; @@ -526,4 +528,63 @@ public class TestKeyValue extends TestCase { Bytes.equals(newKey, KeyValue.ROW_LENGTH_SIZE, newRowLength, expectedArray, 0, expectedArray.length); } + + public void testKVsWithTags() { + byte[] row = Bytes.toBytes("myRow"); + byte[] cf = Bytes.toBytes("myCF"); + byte[] q = Bytes.toBytes("myQualifier"); + byte[] value = Bytes.toBytes("myValue"); + byte[] metaValue1 = Bytes.toBytes("metaValue1"); + byte[] metaValue2 = Bytes.toBytes("metaValue2"); + KeyValue kv = new KeyValue(row, cf, q, HConstants.LATEST_TIMESTAMP, value, new Tag[] { + new Tag((byte) 1, metaValue1), new Tag((byte) 2, metaValue2) }); + assertTrue(kv.getTagsLength() > 0); + assertTrue(Bytes.equals(kv.getRow(), row)); + assertTrue(Bytes.equals(kv.getFamily(), cf)); + assertTrue(Bytes.equals(kv.getQualifier(), q)); + assertTrue(Bytes.equals(kv.getValue(), value)); + List tags = kv.getTags(); + assertNotNull(tags); + assertEquals(2, tags.size()); + boolean meta1Ok = false, meta2Ok = false; + for (Tag tag : tags) { + if (tag.getType() == (byte) 1) { + if (Bytes.equals(tag.getValue(), metaValue1)) { + meta1Ok = true; + } + } else { + if (Bytes.equals(tag.getValue(), metaValue2)) { + meta2Ok = true; + } + } + } + assertTrue(meta1Ok); + assertTrue(meta2Ok); + + Iterator tagItr = kv.tagsIterator(); + assertTrue(tagItr.hasNext()); + Tag next = tagItr.next(); + assertEquals(10, next.getTagLength()); + assertEquals((byte) 1, next.getType()); + Bytes.equals(next.getValue(), metaValue1); + assertTrue(tagItr.hasNext()); + next = tagItr.next(); + assertEquals(10, next.getTagLength()); + assertEquals((byte) 2, next.getType()); + Bytes.equals(next.getValue(), metaValue2); + assertFalse(tagItr.hasNext()); + + tagItr = kv.tagsIterator(); + assertTrue(tagItr.hasNext()); + next = tagItr.next(); + assertEquals(10, next.getTagLength()); + assertEquals((byte) 1, next.getType()); + Bytes.equals(next.getValue(), metaValue1); + assertTrue(tagItr.hasNext()); + next = tagItr.next(); + assertEquals(10, next.getTagLength()); + assertEquals((byte) 2, next.getType()); + Bytes.equals(next.getValue(), metaValue2); + assertFalse(tagItr.hasNext()); + } } diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/codec/TestCellCodec.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/codec/TestCellCodec.java index 3ed6efc0e0d..7b8d62ce962 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/codec/TestCellCodec.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/codec/TestCellCodec.java @@ -27,8 +27,10 @@ import java.io.IOException; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.codec.CellCodec; import org.apache.hadoop.hbase.codec.Codec; import org.apache.hadoop.hbase.util.Bytes; @@ -122,4 +124,47 @@ public class TestCellCodec { dis.close(); assertEquals(offset, cis.getCount()); } -} + + @Test + public void testThreeWithTag() throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + CountingOutputStream cos = new CountingOutputStream(baos); + DataOutputStream dos = new DataOutputStream(cos); + Codec codec = new CellCodec(); + Codec.Encoder encoder = codec.getEncoder(dos); + final KeyValue kv1 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("1"), + HConstants.LATEST_TIMESTAMP, Bytes.toBytes("1"), new Tag[] { + new Tag((byte) 1, Bytes.toBytes("teststring1")), + new Tag((byte) 2, Bytes.toBytes("testString2")) }); + final KeyValue kv2 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("2"), + HConstants.LATEST_TIMESTAMP, Bytes.toBytes("2"), new Tag[] { new Tag((byte) 1, + Bytes.toBytes("teststring3")), }); + final KeyValue kv3 = new KeyValue(Bytes.toBytes("r"), Bytes.toBytes("f"), Bytes.toBytes("3"), + HConstants.LATEST_TIMESTAMP, Bytes.toBytes("3"), new Tag[] { + new Tag((byte) 2, Bytes.toBytes("teststring4")), + new Tag((byte) 2, Bytes.toBytes("teststring5")), + new Tag((byte) 1, Bytes.toBytes("teststring6")) }); + + encoder.write(kv1); + encoder.write(kv2); + encoder.write(kv3); + encoder.flush(); + dos.close(); + long offset = cos.getCount(); + CountingInputStream cis = new CountingInputStream(new ByteArrayInputStream(baos.toByteArray())); + DataInputStream dis = new DataInputStream(cis); + Codec.Decoder decoder = codec.getDecoder(dis); + assertTrue(decoder.advance()); + Cell c = decoder.current(); + assertTrue(CellComparator.equals(c, kv1)); + assertTrue(decoder.advance()); + c = decoder.current(); + assertTrue(CellComparator.equals(c, kv2)); + assertTrue(decoder.advance()); + c = decoder.current(); + assertTrue(CellComparator.equals(c, kv3)); + assertFalse(decoder.advance()); + dis.close(); + assertEquals(offset, cis.getCount()); + } +} \ No newline at end of file diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java index 037cbe081a6..497c12a3bbd 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngest.java @@ -47,7 +47,7 @@ public class IntegrationTestIngest extends IntegrationTestBase { protected static final Log LOG = LogFactory.getLog(IntegrationTestIngest.class); protected IntegrationTestingUtility util; protected HBaseCluster cluster; - private LoadTestTool loadTool; + protected LoadTestTool loadTool; protected void setUp(int numSlavesBase) throws Exception { util = getTestingUtil(null); @@ -84,7 +84,7 @@ public class IntegrationTestIngest extends IntegrationTestBase { @Test public void internalRunIngestTest() throws Exception { - runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 1024, 10); + runIngestTest(DEFAULT_RUN_TIME, 2500, 10, 1024, 10, false, 10); } @Override @@ -104,7 +104,7 @@ public class IntegrationTestIngest extends IntegrationTestBase { } protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, - int colsPerKey, int recordSize, int writeThreads) throws Exception { + int colsPerKey, int recordSize, int writeThreads, boolean useTags, int maxTagsPerKey) throws Exception { LOG.info("Running ingest"); LOG.info("Cluster size:" + util.getHBaseClusterInterface().getClusterStatus().getServersSize()); @@ -118,39 +118,46 @@ public class IntegrationTestIngest extends IntegrationTestBase { LOG.info("Intended run time: " + (runtime/60000) + " min, left:" + ((runtime - (System.currentTimeMillis() - start))/60000) + " min"); - int ret = loadTool.run(new String[] { - "-tn", getTablename(), - "-write", String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), - "-start_key", String.valueOf(startKey), - "-num_keys", String.valueOf(numKeys), - "-skip_init" - }); + int ret = -1; + if (useTags) { + ret = loadTool.run(new String[] { "-tn", getTablename(), "-write", + String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), "-start_key", + String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init", + "-usetags", "-num_tags", String.format("1:%d", maxTagsPerKey) }); + } else { + ret = loadTool.run(new String[] { "-tn", getTablename(), "-write", + String.format("%d:%d:%d", colsPerKey, recordSize, writeThreads), "-start_key", + String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init" }); + } if (0 != ret) { String errorMsg = "Load failed with error code " + ret; LOG.error(errorMsg); Assert.fail(errorMsg); } - ret = loadTool.run(new String[] { - "-tn", getTablename(), - "-update", String.format("60:%d", writeThreads), - "-start_key", String.valueOf(startKey), - "-num_keys", String.valueOf(numKeys), - "-skip_init" - }); + if (useTags) { + ret = loadTool.run(new String[] { "-tn", getTablename(), "-update", + String.format("60:%d", writeThreads), "-start_key", String.valueOf(startKey), + "-num_keys", String.valueOf(numKeys), "-skip_init", "-usetags", "-num_tags", + String.format("1:%d", maxTagsPerKey) }); + } else { + ret = loadTool.run(new String[] { "-tn", getTablename(), "-update", + String.format("60:%d", writeThreads), "-start_key", String.valueOf(startKey), + "-num_keys", String.valueOf(numKeys), "-skip_init" }); + } if (0 != ret) { String errorMsg = "Update failed with error code " + ret; LOG.error(errorMsg); Assert.fail(errorMsg); } - - ret = loadTool.run(new String[] { - "-tn", getTablename(), - "-read", "100:20", - "-start_key", String.valueOf(startKey), - "-num_keys", String.valueOf(numKeys), - "-skip_init" - }); + if (useTags) { + ret = loadTool.run(new String[] { "-tn", getTablename(), "-read", "100:20", "-start_key", + String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init", + "-usetags", "-num_tags", String.format("1:%d", maxTagsPerKey) }); + } else { + ret = loadTool.run(new String[] { "-tn", getTablename(), "-read", "100:20", "-start_key", + String.valueOf(startKey), "-num_keys", String.valueOf(numKeys), "-skip_init" }); + } if (0 != ret) { String errorMsg = "Verification failed with error code " + ret; LOG.error(errorMsg); diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithTags.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithTags.java new file mode 100644 index 00000000000..6b32975394b --- /dev/null +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestIngestWithTags.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import org.junit.Before; +import org.junit.experimental.categories.Category; + +@Category(IntegrationTests.class) +public class IntegrationTestIngestWithTags extends IntegrationTestIngest { + @Before + @Override + public void setUp() throws Exception { + getTestingUtil(conf).getConfiguration().setInt("hfile.format.version", 3); + super.setUp(); + } + + @Override + protected void runIngestTest(long defaultRunTime, int keysPerServerPerIter, int colsPerKey, + int recordSize, int writeThreads, boolean useTags, int maxTagsPerKey) throws Exception { + super.runIngestTest(defaultRunTime, keysPerServerPerIter, colsPerKey, recordSize, writeThreads, + true, 10); + } +} diff --git a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestLazyCfLoading.java b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestLazyCfLoading.java index 381aff7d501..c3e0122277e 100644 --- a/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestLazyCfLoading.java +++ b/hbase-it/src/test/java/org/apache/hadoop/hbase/IntegrationTestLazyCfLoading.java @@ -230,7 +230,8 @@ public class IntegrationTestLazyCfLoading { writer.setMultiPut(true); LOG.info("Starting writer; the number of keys to write is " + keysToWrite); - writer.start(1, keysToWrite, WRITER_THREADS); + // TODO : Need to see if tag support has to be given here in the integration test suite + writer.start(1, keysToWrite, WRITER_THREADS, false, 0, 0); // Now, do scans. long now = EnvironmentEdgeManager.currentTimeMillis(); diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java index e8375eaac28..b209529ec8b 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/PrefixTreeCodec.java @@ -34,7 +34,6 @@ import org.apache.hadoop.hbase.codec.prefixtree.decode.PrefixTreeArraySearcher; import org.apache.hadoop.hbase.codec.prefixtree.encode.EncoderFactory; import org.apache.hadoop.hbase.codec.prefixtree.encode.PrefixTreeEncoder; import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; @@ -42,8 +41,8 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; import org.apache.hadoop.hbase.io.hfile.BlockType; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.util.ByteBufferUtils; -import org.apache.hadoop.io.RawComparator; /** * This class is created via reflection in DataBlockEncoding enum. Update the enum if class name or @@ -69,7 +68,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{ * enough with the concept of the HFileBlockEncodingContext. */ @Override - public void encodeKeyValues(ByteBuffer in, boolean includesMvccVersion, + public void encodeKeyValues(ByteBuffer in, HFileBlockEncodingContext blkEncodingCtx) throws IOException { if (blkEncodingCtx.getClass() != HFileBlockDefaultEncodingContext.class) { throw new IOException(this.getClass().getName() + " only accepts " @@ -80,7 +79,8 @@ public class PrefixTreeCodec implements DataBlockEncoder{ = (HFileBlockDefaultEncodingContext) blkEncodingCtx; encodingCtx.prepareEncoding(); DataOutputStream dataOut = encodingCtx.getOutputStreamForEncoder(); - internalEncodeKeyValues(dataOut, in, includesMvccVersion); + internalEncodeKeyValues(dataOut, in, encodingCtx.getHFileContext().shouldIncludeMvcc(), + encodingCtx.getHFileContext().shouldIncludeTags()); //do i need to check this, or will it always be DataBlockEncoding.PREFIX_TREE? if (encodingCtx.getDataBlockEncoding() != DataBlockEncoding.NONE) { @@ -91,26 +91,26 @@ public class PrefixTreeCodec implements DataBlockEncoder{ } private void internalEncodeKeyValues(DataOutputStream encodedOutputStream, - ByteBuffer rawKeyValues, boolean includesMvccVersion) throws IOException { + ByteBuffer rawKeyValues, boolean includesMvccVersion, boolean includesTag) throws IOException { rawKeyValues.rewind(); PrefixTreeEncoder builder = EncoderFactory.checkOut(encodedOutputStream, includesMvccVersion); - try{ + try { KeyValue kv; - while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion)) != null) { + while ((kv = KeyValueUtil.nextShallowCopy(rawKeyValues, includesMvccVersion, includesTag)) != null) { builder.write(kv); } builder.flush(); - }finally{ + } finally { EncoderFactory.checkIn(builder); } } @Override - public ByteBuffer decodeKeyValues(DataInputStream source, boolean includesMvccVersion) + public ByteBuffer decodeKeyValues(DataInputStream source, HFileBlockDecodingContext decodingCtx) throws IOException { - return decodeKeyValues(source, 0, 0, includesMvccVersion); + return decodeKeyValues(source, 0, 0, decodingCtx); } @@ -118,9 +118,8 @@ public class PrefixTreeCodec implements DataBlockEncoder{ * I don't think this method is called during normal HBase operation, so efficiency is not * important. */ - @Override public ByteBuffer decodeKeyValues(DataInputStream source, int allocateHeaderLength, - int skipLastBytes, boolean includesMvccVersion) throws IOException { + int skipLastBytes, HFileBlockDecodingContext decodingCtx) throws IOException { ByteBuffer sourceAsBuffer = ByteBufferUtils.drainInputStreamToBuffer(source);// waste sourceAsBuffer.mark(); PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(sourceAsBuffer); @@ -131,17 +130,19 @@ public class PrefixTreeCodec implements DataBlockEncoder{ result.rewind(); CellSearcher searcher = null; try { - searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvccVersion); + boolean includesMvcc = decodingCtx.getHFileContext().shouldIncludeMvcc(); + searcher = DecoderFactory.checkOut(sourceAsBuffer, includesMvcc); while (searcher.advance()) { KeyValue currentCell = KeyValueUtil.copyToNewKeyValue(searcher.current()); // needs to be modified for DirectByteBuffers. no existing methods to // write VLongs to byte[] int offset = result.arrayOffset() + result.position(); - KeyValueUtil.appendToByteArray(currentCell, result.array(), offset); + System.arraycopy(currentCell.getBuffer(), currentCell.getOffset(), result.array(), offset, + currentCell.getLength()); int keyValueLength = KeyValueUtil.length(currentCell); ByteBufferUtils.skip(result, keyValueLength); offset += keyValueLength; - if (includesMvccVersion) { + if (includesMvcc) { ByteBufferUtils.writeVLong(result, currentCell.getMvccVersion()); } } @@ -158,7 +159,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{ block.rewind(); PrefixTreeArraySearcher searcher = null; try { - //should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will + // should i includeMemstoreTS (second argument)? i think PrefixKeyDeltaEncoder is, so i will searcher = DecoderFactory.checkOut(block, true); if (!searcher.positionAtFirstCell()) { return null; @@ -170,19 +171,19 @@ public class PrefixTreeCodec implements DataBlockEncoder{ } @Override - public HFileBlockEncodingContext newDataBlockEncodingContext(Algorithm compressionAlgorithm, - DataBlockEncoding encoding, byte[] header) { + public HFileBlockEncodingContext newDataBlockEncodingContext( + DataBlockEncoding encoding, byte[] header, HFileContext meta) { if(DataBlockEncoding.PREFIX_TREE != encoding){ //i'm not sure why encoding is in the interface. Each encoder implementation should probably //know it's encoding type throw new IllegalArgumentException("only DataBlockEncoding.PREFIX_TREE supported"); } - return new HFileBlockDefaultEncodingContext(compressionAlgorithm, encoding, header); + return new HFileBlockDefaultEncodingContext(encoding, header, meta); } @Override - public HFileBlockDecodingContext newDataBlockDecodingContext(Algorithm compressionAlgorithm) { - return new HFileBlockDefaultDecodingContext(compressionAlgorithm); + public HFileBlockDecodingContext newDataBlockDecodingContext(HFileContext meta) { + return new HFileBlockDefaultDecodingContext(meta); } /** @@ -190,7 +191,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{ * the way to this point. */ @Override - public EncodedSeeker createSeeker(KVComparator comparator, boolean includesMvccVersion) { + public EncodedSeeker createSeeker(KVComparator comparator, HFileBlockDecodingContext decodingCtx) { if (comparator instanceof RawBytesComparator){ throw new IllegalArgumentException("comparator must be KeyValue.KeyComparator"); } else if (comparator instanceof MetaComparator){ @@ -198,7 +199,7 @@ public class PrefixTreeCodec implements DataBlockEncoder{ +"table"); } - return new PrefixTreeSeeker(includesMvccVersion); + return new PrefixTreeSeeker(decodingCtx.getHFileContext().shouldIncludeMvcc()); } } diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java index 90c22b700e8..22086a9ef94 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/DecoderFactory.java @@ -59,13 +59,13 @@ public class DecoderFactory { /**************************** helper ******************************/ - public static PrefixTreeArraySearcher ensureArraySearcherValid(ByteBuffer buffer, PrefixTreeArraySearcher searcher, boolean includeMvccVersion) { if (searcher == null) { PrefixTreeBlockMeta blockMeta = new PrefixTreeBlockMeta(buffer); searcher = new PrefixTreeArraySearcher(blockMeta, blockMeta.getRowTreeDepth(), - blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength()); + blockMeta.getMaxRowLength(), blockMeta.getMaxQualifierLength(), + blockMeta.getMaxTagsLength()); searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); return searcher; } @@ -78,8 +78,9 @@ public class DecoderFactory { int rowBufferLength = Math.max(blockMeta.getMaxRowLength(), searcher.getRowBufferLength()); int qualifierBufferLength = Math.max(blockMeta.getMaxQualifierLength(), searcher.getQualifierBufferLength()); + int tagBufferLength = Math.max(blockMeta.getMaxTagsLength(), searcher.getTagBufferLength()); searcher = new PrefixTreeArraySearcher(blockMeta, maxRowTreeStackNodes, rowBufferLength, - qualifierBufferLength); + qualifierBufferLength, tagBufferLength); } //this is where we parse the BlockMeta searcher.initOnBlock(blockMeta, buffer.array(), includeMvccVersion); diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java index 5e1f48b08d6..34ceb8fa54f 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayReversibleScanner.java @@ -33,8 +33,8 @@ public class PrefixTreeArrayReversibleScanner extends PrefixTreeArrayScanner imp /***************** construct ******************************/ public PrefixTreeArrayReversibleScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, - int rowBufferLength, int qualifierBufferLength) { - super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); + int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) { + super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength); } diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java index 6cb670f2ef1..13269837a32 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArrayScanner.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader; import org.apache.hadoop.hbase.codec.prefixtree.decode.row.RowNodeReader; import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.MvccVersionDecoder; import org.apache.hadoop.hbase.codec.prefixtree.decode.timestamp.TimestampDecoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType; +import org.apache.hadoop.hbase.util.Bytes; /** * Extends PtCell and manipulates its protected fields. Could alternatively contain a PtCell and @@ -53,6 +55,7 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne protected RowNodeReader currentRowNode; protected ColumnReader familyReader; protected ColumnReader qualifierReader; + protected ColumnReader tagsReader; protected TimestampDecoder timestampDecoder; protected MvccVersionDecoder mvccVersionDecoder; @@ -63,17 +66,19 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne /*********************** construct ******************************/ // pass in blockMeta so we can initialize buffers big enough for all cells in the block - public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, - int rowBufferLength, int qualifierBufferLength) { + public PrefixTreeArrayScanner(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, + int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) { this.rowNodes = new RowNodeReader[rowTreeDepth]; for (int i = 0; i < rowNodes.length; ++i) { rowNodes[i] = new RowNodeReader(); } this.rowBuffer = new byte[rowBufferLength]; this.familyBuffer = new byte[PrefixTreeBlockMeta.MAX_FAMILY_LENGTH]; - this.familyReader = new ColumnReader(familyBuffer, true); + this.familyReader = new ColumnReader(familyBuffer, ColumnNodeType.FAMILY); this.qualifierBuffer = new byte[qualifierBufferLength]; - this.qualifierReader = new ColumnReader(qualifierBuffer, false); + this.tagsBuffer = new byte[tagsBufferLength]; + this.qualifierReader = new ColumnReader(qualifierBuffer, ColumnNodeType.QUALIFIER); + this.tagsReader = new ColumnReader(tagsBuffer, ColumnNodeType.TAGS); this.timestampDecoder = new TimestampDecoder(); this.mvccVersionDecoder = new MvccVersionDecoder(); } @@ -95,6 +100,9 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne if (qualifierBuffer.length < blockMeta.getMaxQualifierLength()) { return false; } + if(tagsBuffer.length < blockMeta.getMaxTagsLength()) { + return false; + } return true; } @@ -106,6 +114,8 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne this.familyReader.initOnBlock(blockMeta, block); this.qualifierOffset = qualifierBuffer.length; this.qualifierReader.initOnBlock(blockMeta, block); + this.tagsOffset = tagsBuffer.length; + this.tagsReader.initOnBlock(blockMeta, block); this.timestampDecoder.initOnBlock(blockMeta, block); this.mvccVersionDecoder.initOnBlock(blockMeta, block); this.includeMvccVersion = includeMvccVersion; @@ -129,6 +139,8 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne type = DEFAULT_TYPE; absoluteValueOffset = 0;//use 0 vs -1 so the cell is valid when value hasn't been initialized valueLength = 0;// had it at -1, but that causes null Cell to add up to the wrong length + tagsOffset = blockMeta.getMaxTagsLength(); + tagsLength = 0; } /** @@ -427,6 +439,10 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne currentCellIndex = cellIndex; populateFamily(); populateQualifier(); + // Read tags only if there are tags in the meta + if(blockMeta.getNumTagsBytes() != 0) { + populateTag(); + } populateTimestamp(); populateMvccVersion(); populateType(); @@ -445,6 +461,12 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne qualifierLength = qualifierReader.getColumnLength(); } + protected void populateTag() { + int tagTreeIndex = currentRowNode.getTagOffset(currentCellIndex, blockMeta); + tagsOffset = tagsReader.populateBuffer(tagTreeIndex).getColumnOffset(); + tagsLength = (short)tagsReader.getColumnLength(); + } + protected void populateTimestamp() { if (blockMeta.isAllSameTimestamp()) { timestamp = blockMeta.getMinTimestamp(); @@ -480,7 +502,6 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne valueLength = currentRowNode.getValueLength(currentCellIndex, blockMeta); } - /**************** getters ***************************/ public byte[] getTreeBytes() { @@ -503,4 +524,8 @@ public class PrefixTreeArrayScanner extends PrefixTreeCell implements CellScanne return qualifierBuffer.length; } + public int getTagBufferLength() { + return tagsBuffer.length; + } + } diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java index 1bb35eb0070..1f89c552c19 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeArraySearcher.java @@ -48,8 +48,8 @@ public class PrefixTreeArraySearcher extends PrefixTreeArrayReversibleScanner im /*************** construct ******************************/ public PrefixTreeArraySearcher(PrefixTreeBlockMeta blockMeta, int rowTreeDepth, - int rowBufferLength, int qualifierBufferLength) { - super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength); + int rowBufferLength, int qualifierBufferLength, int tagsBufferLength) { + super(blockMeta, rowTreeDepth, rowBufferLength, qualifierBufferLength, tagsBufferLength); } diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java index 33afc51d355..390e8027059 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/PrefixTreeCell.java @@ -70,6 +70,9 @@ public class PrefixTreeCell implements Cell, Comparable { protected int absoluteValueOffset; protected int valueLength; + protected byte[] tagsBuffer; + protected int tagsOffset; + protected short tagsLength; /********************** Cell methods ******************/ @@ -217,17 +220,17 @@ public class PrefixTreeCell implements Cell, Comparable { @Override public int getTagsOffset() { - throw new UnsupportedOperationException("Not implemented"); + return tagsOffset; } @Override public short getTagsLength() { - throw new UnsupportedOperationException("Not implemented"); + return tagsLength; } @Override public byte[] getTagsArray() { - throw new UnsupportedOperationException("Not implemented"); + return this.tagsBuffer; } } diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java index e9cf05d2b92..1a6c72ffaed 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnNodeReader.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.codec.prefixtree.decode.column; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType; import org.apache.hadoop.hbase.util.vint.UFIntTool; import org.apache.hadoop.hbase.util.vint.UVIntTool; @@ -30,9 +31,8 @@ public class ColumnNodeReader { protected PrefixTreeBlockMeta blockMeta; protected byte[] block; - + protected ColumnNodeType nodeType; protected byte[] columnBuffer; - protected boolean familyVsQualifier; protected int offsetIntoBlock; @@ -43,9 +43,9 @@ public class ColumnNodeReader { /************** construct *************************/ - public ColumnNodeReader(byte[] columnBuffer, boolean familyVsQualifier) { + public ColumnNodeReader(byte[] columnBuffer, ColumnNodeType nodeType) { this.columnBuffer = columnBuffer; - this.familyVsQualifier = familyVsQualifier; + this.nodeType = nodeType; } public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { @@ -62,10 +62,12 @@ public class ColumnNodeReader { tokenOffsetIntoBlock = offsetIntoBlock + UVIntTool.numBytes(tokenLength); int parentStartPositionIndex = tokenOffsetIntoBlock + tokenLength; int offsetWidth; - if (familyVsQualifier) { + if(nodeType == ColumnNodeType.FAMILY) { offsetWidth = blockMeta.getFamilyOffsetWidth(); - } else { + } else if(nodeType == ColumnNodeType.QUALIFIER) { offsetWidth = blockMeta.getQualifierOffsetWidth(); + } else { + offsetWidth = blockMeta.getTagsOffsetWidth(); } parentStartPosition = (int) UFIntTool.fromBytes(block, parentStartPositionIndex, offsetWidth); } @@ -75,10 +77,12 @@ public class ColumnNodeReader { } public boolean isRoot() { - if (familyVsQualifier) { + if (nodeType == ColumnNodeType.FAMILY) { return offsetIntoBlock == blockMeta.getAbsoluteFamilyOffset(); - } else { + } else if (nodeType == ColumnNodeType.QUALIFIER) { return offsetIntoBlock == blockMeta.getAbsoluteQualifierOffset(); + } else { + return offsetIntoBlock == blockMeta.getAbsoluteTagsOffset(); } } diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java index 2b04a4be840..279009eb6b4 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/column/ColumnReader.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.codec.prefixtree.decode.column; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType; /** * Position one of these appropriately in the data block and you can call its methods to retrieve @@ -35,17 +36,17 @@ public class ColumnReader { protected byte[] columnBuffer; protected int columnOffset; protected int columnLength; - protected boolean familyVsQualifier; + protected ColumnNodeType nodeType; protected ColumnNodeReader columnNodeReader; /******************** construct *******************/ - public ColumnReader(byte[] columnBuffer, boolean familyVsQualifier) { + public ColumnReader(byte[] columnBuffer, ColumnNodeType nodeType) { this.columnBuffer = columnBuffer; - this.familyVsQualifier = familyVsQualifier; - this.columnNodeReader = new ColumnNodeReader(columnBuffer, familyVsQualifier); + this.nodeType = nodeType; + this.columnNodeReader = new ColumnNodeReader(columnBuffer, nodeType); } public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block) { @@ -61,11 +62,13 @@ public class ColumnReader { clearColumnBuffer(); int nextRelativeOffset = offsetIntoColumnData; while (true) { - int absoluteOffset; - if (familyVsQualifier) { + int absoluteOffset = 0; + if (nodeType == ColumnNodeType.FAMILY) { absoluteOffset = blockMeta.getAbsoluteFamilyOffset() + nextRelativeOffset; - } else { + } else if (nodeType == ColumnNodeType.QUALIFIER) { absoluteOffset = blockMeta.getAbsoluteQualifierOffset() + nextRelativeOffset; + } else { + absoluteOffset = blockMeta.getAbsoluteTagsOffset() + nextRelativeOffset; } columnNodeReader.positionAt(absoluteOffset); columnOffset -= columnNodeReader.getTokenLength(); diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java index efd6c05b36c..638f4fa1c34 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/decode/row/RowNodeReader.java @@ -20,8 +20,8 @@ package org.apache.hadoop.hbase.codec.prefixtree.decode.row; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; -import org.apache.hadoop.hbase.util.SimpleByteRange; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.SimpleByteRange; import org.apache.hadoop.hbase.util.vint.UFIntTool; import org.apache.hadoop.hbase.util.vint.UVIntTool; @@ -52,13 +52,14 @@ public class RowNodeReader { protected int operationTypesOffset; protected int valueOffsetsOffset; protected int valueLengthsOffset; + protected int tagOffsetsOffset; protected int nextNodeOffsetsOffset; /******************* construct **************************/ public void initOnBlock(PrefixTreeBlockMeta blockMeta, byte[] block, int offset) { - this.block = block; + this.block = block; this.offset = offset; resetFanIndex(); @@ -73,8 +74,15 @@ public class RowNodeReader { this.familyOffsetsOffset = fanOffset + fanOut + UVIntTool.numBytes(numCells); this.qualifierOffsetsOffset = familyOffsetsOffset + numCells * blockMeta.getFamilyOffsetWidth(); - this.timestampIndexesOffset = qualifierOffsetsOffset + numCells - * blockMeta.getQualifierOffsetWidth(); + this.tagOffsetsOffset = this.qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth(); + // TODO : This code may not be needed now..As we always consider tags to be present + if(blockMeta.getTagsOffsetWidth() == 0) { + // Make both of them same so that we know that there are no tags + this.tagOffsetsOffset = this.qualifierOffsetsOffset; + this.timestampIndexesOffset = qualifierOffsetsOffset + numCells * blockMeta.getQualifierOffsetWidth(); + } else { + this.timestampIndexesOffset = tagOffsetsOffset + numCells * blockMeta.getTagsOffsetWidth(); + } this.mvccVersionIndexesOffset = timestampIndexesOffset + numCells * blockMeta.getTimestampIndexWidth(); this.operationTypesOffset = mvccVersionIndexesOffset + numCells @@ -134,6 +142,12 @@ public class RowNodeReader { return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); } + public int getTagOffset(int index, PrefixTreeBlockMeta blockMeta) { + int fIntWidth = blockMeta.getTagsOffsetWidth(); + int startIndex = tagOffsetsOffset + fIntWidth * index; + return (int) UFIntTool.fromBytes(block, startIndex, fIntWidth); + } + public int getTimestampIndex(int index, PrefixTreeBlockMeta blockMeta) { int fIntWidth = blockMeta.getTimestampIndexWidth(); int startIndex = timestampIndexesOffset + fIntWidth * index; diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java index 6971d8b96d4..ca3c8041593 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/PrefixTreeEncoder.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; import org.apache.hadoop.hbase.codec.prefixtree.encode.other.CellTypeEncoder; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType; import org.apache.hadoop.hbase.codec.prefixtree.encode.other.LongEncoder; import org.apache.hadoop.hbase.codec.prefixtree.encode.row.RowSectionWriter; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; @@ -42,7 +43,6 @@ import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet; import org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet; import org.apache.hadoop.hbase.util.vint.UFIntTool; import org.apache.hadoop.io.WritableUtils; - /** * This is the primary class for converting a CellOutputStream into an encoded byte[]. As Cells are * added they are completely copied into the various encoding structures. This is important because @@ -86,6 +86,7 @@ public class PrefixTreeEncoder implements CellOutputStream { protected ByteRange rowRange; protected ByteRange familyRange; protected ByteRange qualifierRange; + protected ByteRange tagsRange; /* * incoming Cell fields are copied into these arrays @@ -94,7 +95,9 @@ public class PrefixTreeEncoder implements CellOutputStream { protected long[] mvccVersions; protected byte[] typeBytes; protected int[] valueOffsets; + protected int[] tagsOffsets; protected byte[] values; + protected byte[] tags; protected PrefixTreeBlockMeta blockMeta; @@ -114,7 +117,7 @@ public class PrefixTreeEncoder implements CellOutputStream { */ protected ByteRangeSet familyDeduplicator; protected ByteRangeSet qualifierDeduplicator; - + protected ByteRangeSet tagsDeduplicator; /* * Feed sorted byte[]s into these tokenizers which will convert the byte[]s to an in-memory * trie structure with nodes connected by memory pointers (not serializable yet). @@ -122,6 +125,7 @@ public class PrefixTreeEncoder implements CellOutputStream { protected Tokenizer rowTokenizer; protected Tokenizer familyTokenizer; protected Tokenizer qualifierTokenizer; + protected Tokenizer tagsTokenizer; /* * Writers take an in-memory trie, sort the nodes, calculate offsets and lengths, and write @@ -130,6 +134,7 @@ public class PrefixTreeEncoder implements CellOutputStream { protected RowSectionWriter rowWriter; protected ColumnSectionWriter familyWriter; protected ColumnSectionWriter qualifierWriter; + protected ColumnSectionWriter tagsWriter; /* * Integers used for counting cells and bytes. We keep track of the size of the Cells as if they @@ -138,7 +143,9 @@ public class PrefixTreeEncoder implements CellOutputStream { protected int totalCells = 0; protected int totalUnencodedBytes = 0;//numBytes if the cells were KeyValues protected int totalValueBytes = 0; + protected int totalTagBytes = 0; protected int maxValueLength = 0; + protected int maxTagLength = 0; protected int totalBytes = 0;// @@ -170,6 +177,7 @@ public class PrefixTreeEncoder implements CellOutputStream { this.rowWriter = new RowSectionWriter(); this.familyWriter = new ColumnSectionWriter(); this.qualifierWriter = new ColumnSectionWriter(); + initializeTagHelpers(); reset(outputStream, includeMvccVersion); } @@ -179,9 +187,11 @@ public class PrefixTreeEncoder implements CellOutputStream { this.includeMvccVersion = includeMvccVersion; this.outputStream = outputStream; valueOffsets[0] = 0; - familyDeduplicator.reset(); qualifierDeduplicator.reset(); + tagsDeduplicator.reset(); + tagsWriter.reset(); + tagsTokenizer.reset(); rowTokenizer.reset(); timestampEncoder.reset(); mvccVersionEncoder.reset(); @@ -199,6 +209,14 @@ public class PrefixTreeEncoder implements CellOutputStream { totalBytes = 0; } + protected void initializeTagHelpers() { + this.tagsRange = new SimpleByteRange(); + this.tagsDeduplicator = USE_HASH_COLUMN_SORTER ? new ByteRangeHashSet() + : new ByteRangeTreeSet(); + this.tagsTokenizer = new Tokenizer(); + this.tagsWriter = new ColumnSectionWriter(); + } + /** * Check that the arrays used to hold cell fragments are large enough for the cell that is being * added. Since the PrefixTreeEncoder is cached between uses, these arrays may grow during the @@ -259,10 +277,16 @@ public class PrefixTreeEncoder implements CellOutputStream { rowTokenizer.addSorted(CellUtil.fillRowRange(cell, rowRange)); addFamilyPart(cell); addQualifierPart(cell); + addTagPart(cell); addAfterRowFamilyQualifier(cell); } + private void addTagPart(Cell cell) { + CellUtil.fillTagRange(cell, tagsRange); + tagsDeduplicator.add(tagsRange); + } + /***************** internal add methods ************************/ private void addAfterRowFamilyQualifier(Cell cell){ @@ -333,6 +357,7 @@ public class PrefixTreeEncoder implements CellOutputStream { rowWriter.writeBytes(outputStream); familyWriter.writeBytes(outputStream); qualifierWriter.writeBytes(outputStream); + tagsWriter.writeBytes(outputStream); timestampEncoder.writeBytes(outputStream); mvccVersionEncoder.writeBytes(outputStream); //CellType bytes are in the row nodes. there is no additional type section @@ -349,12 +374,13 @@ public class PrefixTreeEncoder implements CellOutputStream { blockMeta.setValueOffsetWidth(UFIntTool.numBytes(lastValueOffset)); blockMeta.setValueLengthWidth(UFIntTool.numBytes(maxValueLength)); blockMeta.setNumValueBytes(totalValueBytes); - totalBytes += totalValueBytes; + totalBytes += totalTagBytes + totalValueBytes; //these compile methods will add to totalBytes compileTypes(); compileMvccVersions(); compileTimestamps(); + compileTags(); compileQualifiers(); compileFamilies(); compileRows(); @@ -397,7 +423,7 @@ public class PrefixTreeEncoder implements CellOutputStream { blockMeta.setNumUniqueQualifiers(qualifierDeduplicator.size()); qualifierDeduplicator.compile(); qualifierTokenizer.addAll(qualifierDeduplicator.getSortedRanges()); - qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, false); + qualifierWriter.reconstruct(blockMeta, qualifierTokenizer, ColumnNodeType.QUALIFIER); qualifierWriter.compile(); int numQualifierBytes = qualifierWriter.getNumBytes(); blockMeta.setNumQualifierBytes(numQualifierBytes); @@ -408,13 +434,24 @@ public class PrefixTreeEncoder implements CellOutputStream { blockMeta.setNumUniqueFamilies(familyDeduplicator.size()); familyDeduplicator.compile(); familyTokenizer.addAll(familyDeduplicator.getSortedRanges()); - familyWriter.reconstruct(blockMeta, familyTokenizer, true); + familyWriter.reconstruct(blockMeta, familyTokenizer, ColumnNodeType.FAMILY); familyWriter.compile(); int numFamilyBytes = familyWriter.getNumBytes(); blockMeta.setNumFamilyBytes(numFamilyBytes); totalBytes += numFamilyBytes; } + protected void compileTags() { + blockMeta.setNumUniqueTags(tagsDeduplicator.size()); + tagsDeduplicator.compile(); + tagsTokenizer.addAll(tagsDeduplicator.getSortedRanges()); + tagsWriter.reconstruct(blockMeta, tagsTokenizer, ColumnNodeType.TAGS); + tagsWriter.compile(); + int numTagBytes = tagsWriter.getNumBytes(); + blockMeta.setNumTagsBytes(numTagBytes); + totalBytes += numTagBytes; + } + protected void compileRows() { rowWriter.reconstruct(this); rowWriter.compile(); @@ -476,6 +513,10 @@ public class PrefixTreeEncoder implements CellOutputStream { return qualifierDeduplicator; } + public ByteRangeSet getTagSorter() { + return tagsDeduplicator; + } + public ColumnSectionWriter getFamilyWriter() { return familyWriter; } @@ -484,6 +525,10 @@ public class PrefixTreeEncoder implements CellOutputStream { return qualifierWriter; } + public ColumnSectionWriter getTagWriter() { + return tagsWriter; + } + public RowSectionWriter getRowWriter() { return rowWriter; } diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java index 0105f8f0f92..966495b8b7f 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnNodeWriter.java @@ -23,6 +23,7 @@ import java.io.OutputStream; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; import org.apache.hadoop.hbase.util.ByteRange; import org.apache.hadoop.hbase.util.Bytes; @@ -48,20 +49,19 @@ public class ColumnNodeWriter{ protected TokenizerNode builderNode; protected PrefixTreeBlockMeta blockMeta; - protected boolean familyVsQualifier; - protected int tokenLength; protected byte[] token; protected int parentStartPosition; + protected ColumnNodeType nodeType; /*************** construct **************************/ public ColumnNodeWriter(PrefixTreeBlockMeta blockMeta, TokenizerNode builderNode, - boolean familyVsQualifier) { + ColumnNodeType nodeType) { this.blockMeta = blockMeta; this.builderNode = builderNode; - this.familyVsQualifier = familyVsQualifier; + this.nodeType = nodeType; calculateTokenLength(); } @@ -93,10 +93,12 @@ public class ColumnNodeWriter{ public void writeBytes(OutputStream os) throws IOException { int parentOffsetWidth; - if (familyVsQualifier) { + if (this.nodeType == ColumnNodeType.FAMILY) { parentOffsetWidth = blockMeta.getFamilyOffsetWidth(); - } else { + } else if (this.nodeType == ColumnNodeType.QUALIFIER) { parentOffsetWidth = blockMeta.getQualifierOffsetWidth(); + } else { + parentOffsetWidth = blockMeta.getTagsOffsetWidth(); } UVIntTool.writeBytes(tokenLength, os); os.write(token); diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java index 122ffb4828d..49b1533224f 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/column/ColumnSectionWriter.java @@ -25,6 +25,7 @@ import java.util.List; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; import org.apache.hadoop.hbase.util.CollectionUtils; @@ -60,7 +61,7 @@ public class ColumnSectionWriter { private PrefixTreeBlockMeta blockMeta; - private boolean familyVsQualifier; + private ColumnNodeType nodeType; private Tokenizer tokenizer; private int numBytes = 0; private ArrayList nonLeaves; @@ -79,16 +80,16 @@ public class ColumnSectionWriter { } public ColumnSectionWriter(PrefixTreeBlockMeta blockMeta, Tokenizer builder, - boolean familyVsQualifier) { + ColumnNodeType nodeType) { this();// init collections - reconstruct(blockMeta, builder, familyVsQualifier); + reconstruct(blockMeta, builder, nodeType); } public void reconstruct(PrefixTreeBlockMeta blockMeta, Tokenizer builder, - boolean familyVsQualifier) { + ColumnNodeType nodeType) { this.blockMeta = blockMeta; this.tokenizer = builder; - this.familyVsQualifier = familyVsQualifier; + this.nodeType = nodeType; } public void reset() { @@ -102,14 +103,19 @@ public class ColumnSectionWriter { /****************** methods *******************************/ public ColumnSectionWriter compile() { - if (familyVsQualifier) { + if (this.nodeType == ColumnNodeType.FAMILY) { // do nothing. max family length fixed at Byte.MAX_VALUE - } else { + } else if (this.nodeType == ColumnNodeType.QUALIFIER) { blockMeta.setMaxQualifierLength(tokenizer.getMaxElementLength()); + } else { + blockMeta.setMaxTagsLength(tokenizer.getMaxElementLength()); } + compilerInternals(); + return this; + } + protected void compilerInternals() { tokenizer.setNodeFirstInsertionIndexes(); - tokenizer.appendNodes(nonLeaves, true, false); tokenizer.appendNodes(leaves, false, true); @@ -121,7 +127,7 @@ public class ColumnSectionWriter { columnNodeWriters = Lists.newArrayListWithCapacity(CollectionUtils.nullSafeSize(allNodes)); for (int i = 0; i < allNodes.size(); ++i) { TokenizerNode node = allNodes.get(i); - columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, familyVsQualifier)); + columnNodeWriters.add(new ColumnNodeWriter(blockMeta, node, this.nodeType)); } // leaf widths are known at this point, so add them up @@ -142,10 +148,12 @@ public class ColumnSectionWriter { break; }// it fits } - if (familyVsQualifier) { + if (this.nodeType == ColumnNodeType.FAMILY) { blockMeta.setFamilyOffsetWidth(parentOffsetWidth); - } else { + } else if (this.nodeType == ColumnNodeType.QUALIFIER) { blockMeta.setQualifierOffsetWidth(parentOffsetWidth); + } else { + blockMeta.setTagsOffsetWidth(parentOffsetWidth); } int forwardIndex = 0; @@ -165,8 +173,6 @@ public class ColumnSectionWriter { } tokenizer.appendOutputArrayOffsets(outputArrayOffsets); - - return this; } public void writeBytes(OutputStream os) throws IOException { diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/ColumnNodeType.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/ColumnNodeType.java new file mode 100644 index 00000000000..8946cc10234 --- /dev/null +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/other/ColumnNodeType.java @@ -0,0 +1,28 @@ +package org.apache.hadoop.hbase.codec.prefixtree.encode.other; + +import org.apache.hadoop.classification.InterfaceAudience; + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Specifies the type of columnnode writer. + */ +@InterfaceAudience.Private +public enum ColumnNodeType { + FAMILY, QUALIFIER, TAGS; +} \ No newline at end of file diff --git a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java index d253e392b86..ca8e59ffd8d 100644 --- a/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java +++ b/hbase-prefix-tree/src/main/java/org/apache/hadoop/hbase/codec/prefixtree/encode/row/RowNodeWriter.java @@ -105,6 +105,7 @@ public class RowNodeWriter{ if(tokenizerNode.hasOccurrences()){ int fixedBytesPerCell = blockMeta.getFamilyOffsetWidth() + blockMeta.getQualifierOffsetWidth() + + blockMeta.getTagsOffsetWidth() + blockMeta.getTimestampIndexWidth() + blockMeta.getMvccVersionIndexWidth() + blockMeta.getKeyValueTypeWidth() @@ -132,12 +133,12 @@ public class RowNodeWriter{ //UFInt indexes and offsets for each cell in the row (if nub or leaf) writeFamilyNodeOffsets(os); writeQualifierNodeOffsets(os); + writeTagNodeOffsets(os); writeTimestampIndexes(os); writeMvccVersionIndexes(os); writeCellTypes(os); writeValueOffsets(os); writeValueLengths(os); - //offsets to the children of this row trie node (if branch or nub) writeNextRowTrieNodeOffsets(os); } @@ -220,6 +221,20 @@ public class RowNodeWriter{ } } + protected void writeTagNodeOffsets(OutputStream os) throws IOException { + if (blockMeta.getTagsOffsetWidth() <= 0) { + return; + } + for (int i = 0; i < numCells; ++i) { + int cellInsertionIndex = tokenizerNode.getFirstInsertionIndex() + i; + int sortedIndex = prefixTreeEncoder.getTagSorter().getSortedIndexForInsertionId( + cellInsertionIndex); + int indexedTagOffset = prefixTreeEncoder.getTagWriter().getOutputArrayOffset( + sortedIndex); + UFIntTool.writeBytes(blockMeta.getTagsOffsetWidth(), indexedTagOffset, os); + } + } + protected void writeTimestampIndexes(OutputStream os) throws IOException { if (blockMeta.getTimestampIndexWidth() <= 0) { return; @@ -270,7 +285,6 @@ public class RowNodeWriter{ } } - /** * If a branch or a nub, the last thing we append are the UFInt offsets to the child row nodes. */ diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java index 5bc418618ab..0623aa18a3f 100644 --- a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/keyvalue/TestKeyValueTool.java @@ -25,6 +25,8 @@ import java.util.List; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueTestUtil; import org.apache.hadoop.hbase.codec.prefixtree.row.TestRowData; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags; import org.junit.Assert; import org.junit.Test; import org.junit.runner.RunWith; @@ -47,9 +49,12 @@ public class TestKeyValueTool { @Test public void testRoundTripToBytes() { + if(rows instanceof TestRowDataTrivialWithTags || rows instanceof TestRowDataRandomKeyValuesWithTags) { + return; + } List kvs = rows.getInputs(); ByteBuffer bb = KeyValueTestUtil.toByteBufferAndRewind(kvs, false); - List roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false); + List roundTrippedKvs = KeyValueTestUtil.rewindThenToList(bb, false, false); Assert.assertArrayEquals(kvs.toArray(), roundTrippedKvs.toArray()); } } diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java index 71b5b1c9320..8766a2d6609 100644 --- a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/column/TestColumnBuilder.java @@ -26,6 +26,7 @@ import java.util.List; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; import org.apache.hadoop.hbase.codec.prefixtree.decode.column.ColumnReader; import org.apache.hadoop.hbase.codec.prefixtree.encode.column.ColumnSectionWriter; +import org.apache.hadoop.hbase.codec.prefixtree.encode.other.ColumnNodeType; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.Tokenizer; import org.apache.hadoop.hbase.codec.prefixtree.encode.tokenize.TokenizerNode; import org.apache.hadoop.hbase.util.ByteRange; @@ -92,12 +93,12 @@ public class TestColumnBuilder { } Assert.assertEquals(sortedUniqueColumns.size(), builderOutputArrays.size()); - writer = new ColumnSectionWriter(blockMeta, builder, false); + writer = new ColumnSectionWriter(blockMeta, builder, ColumnNodeType.QUALIFIER); ByteArrayOutputStream baos = new ByteArrayOutputStream(); writer.compile().writeBytes(baos); bytes = baos.toByteArray(); buffer = new byte[blockMeta.getMaxQualifierLength()]; - reader = new ColumnReader(buffer, false); + reader = new ColumnReader(buffer, ColumnNodeType.QUALIFIER); reader.initOnBlock(blockMeta, bytes); List builderNodes = Lists.newArrayList(); diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowData.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowData.java index c5e3a0a2202..ef068023f07 100644 --- a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowData.java +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowData.java @@ -32,10 +32,12 @@ import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNub; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataNumberStrings; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataQualifierByteOrdering; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValues; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataRandomKeyValuesWithTags; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSearcherRowMiss; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSimple; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataSingleQualifier; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivial; +import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataTrivialWithTags; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrls; import org.apache.hadoop.hbase.codec.prefixtree.row.data.TestRowDataUrlsExample; import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; @@ -65,6 +67,7 @@ public interface TestRowData { //simple all.add(new TestRowDataEmpty()); all.add(new TestRowDataTrivial()); + all.add(new TestRowDataTrivialWithTags()); all.add(new TestRowDataSimple()); all.add(new TestRowDataDeeper()); @@ -83,6 +86,7 @@ public interface TestRowData { all.add(new TestRowDataUrlsExample()); all.add(new TestRowDataExerciseFInts()); all.add(new TestRowDataRandomKeyValues()); + all.add(new TestRowDataRandomKeyValuesWithTags()); return all; } diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowEncoder.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowEncoder.java index 2bbba8bfe73..2a6eddc9dc3 100644 --- a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowEncoder.java +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/TestRowEncoder.java @@ -75,6 +75,7 @@ public class TestRowEncoder { @Before public void compile() throws IOException { + // Always run with tags. But should also ensure that KVs without tags work fine os = new ByteArrayOutputStream(1 << 20); encoder = new PrefixTreeEncoder(os, includeMemstoreTS); @@ -92,7 +93,8 @@ public class TestRowEncoder { blockMetaReader = new PrefixTreeBlockMeta(buffer); searcher = new PrefixTreeArraySearcher(blockMetaReader, blockMetaReader.getRowTreeDepth(), - blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength()); + blockMetaReader.getMaxRowLength(), blockMetaReader.getMaxQualifierLength(), + blockMetaReader.getMaxTagsLength()); searcher.initOnBlock(blockMetaReader, outputBytes, includeMemstoreTS); } diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValuesWithTags.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValuesWithTags.java new file mode 100644 index 00000000000..9fac9db29a8 --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataRandomKeyValuesWithTags.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; + +import com.google.common.collect.Lists; +/** + * Generated KVs with tags + */ +public class TestRowDataRandomKeyValuesWithTags extends BaseTestRowData { + static List d = Lists.newArrayList(); + static RedundantKVGenerator generator = new RedundantKVGenerator(); + static { + d = generator.generateTestKeyValues(1 << 10, true); + } + + @Override + public List getInputs() { + return d; + } +} diff --git a/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataTrivialWithTags.java b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataTrivialWithTags.java new file mode 100644 index 00000000000..1c8f7e1258a --- /dev/null +++ b/hbase-prefix-tree/src/test/java/org/apache/hadoop/hbase/codec/prefixtree/row/data/TestRowDataTrivialWithTags.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.codec.prefixtree.row.data; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeBlockMeta; +import org.apache.hadoop.hbase.codec.prefixtree.row.BaseTestRowData; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellScannerPosition; +import org.apache.hadoop.hbase.codec.prefixtree.scanner.CellSearcher; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; + +import com.google.common.collect.Lists; + +public class TestRowDataTrivialWithTags extends BaseTestRowData{ + static byte[] rA = Bytes.toBytes("rA"), rB = Bytes.toBytes("rB"),// turn "r" + // into a + // branch for + // the + // Searcher + // tests + cf = Bytes.toBytes("fam"), cq0 = Bytes.toBytes("q0"), v0 = Bytes.toBytes("v0"); + + static long ts = 55L; + + static List d = Lists.newArrayList(); + static { + List tagList = new ArrayList(); + Tag t = new Tag((byte) 1, "visisbility"); + tagList.add(t); + t = new Tag((byte) 2, "ACL"); + tagList.add(t); + d.add(new KeyValue(rA, cf, cq0, ts, v0, tagList)); + d.add(new KeyValue(rB, cf, cq0, ts, v0, tagList)); + } + + @Override + public List getInputs() { + return d; + } + + @Override + public void individualBlockMetaAssertions(PrefixTreeBlockMeta blockMeta) { + // node[0] -> root[r] + // node[1] -> leaf[A], etc + Assert.assertEquals(2, blockMeta.getRowTreeDepth()); + } + + @Override + public void individualSearcherAssertions(CellSearcher searcher) { + /** + * The searcher should get a token mismatch on the "r" branch. Assert that + * it skips not only rA, but rB as well. + */ + KeyValue afterLast = KeyValue.createFirstOnRow(Bytes.toBytes("zzz")); + CellScannerPosition position = searcher.positionAtOrAfter(afterLast); + Assert.assertEquals(CellScannerPosition.AFTER_LAST, position); + Assert.assertNull(searcher.current()); + } +} diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/CellProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/CellProtos.java index 445b895ff59..f5177dec7e2 100644 --- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/CellProtos.java +++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/CellProtos.java @@ -201,6 +201,16 @@ public final class CellProtos { * optional bytes value = 6; */ com.google.protobuf.ByteString getValue(); + + // optional bytes tags = 7; + /** + * optional bytes tags = 7; + */ + boolean hasTags(); + /** + * optional bytes tags = 7; + */ + com.google.protobuf.ByteString getTags(); } /** * Protobuf type {@code Cell} @@ -294,6 +304,11 @@ public final class CellProtos { value_ = input.readBytes(); break; } + case 58: { + bitField0_ |= 0x00000040; + tags_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -430,6 +445,22 @@ public final class CellProtos { return value_; } + // optional bytes tags = 7; + public static final int TAGS_FIELD_NUMBER = 7; + private com.google.protobuf.ByteString tags_; + /** + * optional bytes tags = 7; + */ + public boolean hasTags() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + /** + * optional bytes tags = 7; + */ + public com.google.protobuf.ByteString getTags() { + return tags_; + } + private void initFields() { row_ = com.google.protobuf.ByteString.EMPTY; family_ = com.google.protobuf.ByteString.EMPTY; @@ -437,6 +468,7 @@ public final class CellProtos { timestamp_ = 0L; cellType_ = org.apache.hadoop.hbase.protobuf.generated.CellProtos.CellType.MINIMUM; value_ = com.google.protobuf.ByteString.EMPTY; + tags_ = com.google.protobuf.ByteString.EMPTY; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -468,6 +500,9 @@ public final class CellProtos { if (((bitField0_ & 0x00000020) == 0x00000020)) { output.writeBytes(6, value_); } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + output.writeBytes(7, tags_); + } getUnknownFields().writeTo(output); } @@ -501,6 +536,10 @@ public final class CellProtos { size += com.google.protobuf.CodedOutputStream .computeBytesSize(6, value_); } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(7, tags_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -554,6 +593,11 @@ public final class CellProtos { result = result && getValue() .equals(other.getValue()); } + result = result && (hasTags() == other.hasTags()); + if (hasTags()) { + result = result && getTags() + .equals(other.getTags()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -591,6 +635,10 @@ public final class CellProtos { hash = (37 * hash) + VALUE_FIELD_NUMBER; hash = (53 * hash) + getValue().hashCode(); } + if (hasTags()) { + hash = (37 * hash) + TAGS_FIELD_NUMBER; + hash = (53 * hash) + getTags().hashCode(); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -717,6 +765,8 @@ public final class CellProtos { bitField0_ = (bitField0_ & ~0x00000010); value_ = com.google.protobuf.ByteString.EMPTY; bitField0_ = (bitField0_ & ~0x00000020); + tags_ = com.google.protobuf.ByteString.EMPTY; + bitField0_ = (bitField0_ & ~0x00000040); return this; } @@ -769,6 +819,10 @@ public final class CellProtos { to_bitField0_ |= 0x00000020; } result.value_ = value_; + if (((from_bitField0_ & 0x00000040) == 0x00000040)) { + to_bitField0_ |= 0x00000040; + } + result.tags_ = tags_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -803,6 +857,9 @@ public final class CellProtos { if (other.hasValue()) { setValue(other.getValue()); } + if (other.hasTags()) { + setTags(other.getTags()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -1043,6 +1100,42 @@ public final class CellProtos { return this; } + // optional bytes tags = 7; + private com.google.protobuf.ByteString tags_ = com.google.protobuf.ByteString.EMPTY; + /** + * optional bytes tags = 7; + */ + public boolean hasTags() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + /** + * optional bytes tags = 7; + */ + public com.google.protobuf.ByteString getTags() { + return tags_; + } + /** + * optional bytes tags = 7; + */ + public Builder setTags(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000040; + tags_ = value; + onChanged(); + return this; + } + /** + * optional bytes tags = 7; + */ + public Builder clearTags() { + bitField0_ = (bitField0_ & ~0x00000040); + tags_ = getDefaultInstance().getTags(); + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:Cell) } @@ -1116,6 +1209,16 @@ public final class CellProtos { * optional bytes value = 6; */ com.google.protobuf.ByteString getValue(); + + // optional bytes tags = 7; + /** + * optional bytes tags = 7; + */ + boolean hasTags(); + /** + * optional bytes tags = 7; + */ + com.google.protobuf.ByteString getTags(); } /** * Protobuf type {@code KeyValue} @@ -1210,6 +1313,11 @@ public final class CellProtos { value_ = input.readBytes(); break; } + case 58: { + bitField0_ |= 0x00000040; + tags_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -1346,6 +1454,22 @@ public final class CellProtos { return value_; } + // optional bytes tags = 7; + public static final int TAGS_FIELD_NUMBER = 7; + private com.google.protobuf.ByteString tags_; + /** + * optional bytes tags = 7; + */ + public boolean hasTags() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + /** + * optional bytes tags = 7; + */ + public com.google.protobuf.ByteString getTags() { + return tags_; + } + private void initFields() { row_ = com.google.protobuf.ByteString.EMPTY; family_ = com.google.protobuf.ByteString.EMPTY; @@ -1353,6 +1477,7 @@ public final class CellProtos { timestamp_ = 0L; keyType_ = org.apache.hadoop.hbase.protobuf.generated.CellProtos.CellType.MINIMUM; value_ = com.google.protobuf.ByteString.EMPTY; + tags_ = com.google.protobuf.ByteString.EMPTY; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -1396,6 +1521,9 @@ public final class CellProtos { if (((bitField0_ & 0x00000020) == 0x00000020)) { output.writeBytes(6, value_); } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + output.writeBytes(7, tags_); + } getUnknownFields().writeTo(output); } @@ -1429,6 +1557,10 @@ public final class CellProtos { size += com.google.protobuf.CodedOutputStream .computeBytesSize(6, value_); } + if (((bitField0_ & 0x00000040) == 0x00000040)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(7, tags_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -1482,6 +1614,11 @@ public final class CellProtos { result = result && getValue() .equals(other.getValue()); } + result = result && (hasTags() == other.hasTags()); + if (hasTags()) { + result = result && getTags() + .equals(other.getTags()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -1519,6 +1656,10 @@ public final class CellProtos { hash = (37 * hash) + VALUE_FIELD_NUMBER; hash = (53 * hash) + getValue().hashCode(); } + if (hasTags()) { + hash = (37 * hash) + TAGS_FIELD_NUMBER; + hash = (53 * hash) + getTags().hashCode(); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -1646,6 +1787,8 @@ public final class CellProtos { bitField0_ = (bitField0_ & ~0x00000010); value_ = com.google.protobuf.ByteString.EMPTY; bitField0_ = (bitField0_ & ~0x00000020); + tags_ = com.google.protobuf.ByteString.EMPTY; + bitField0_ = (bitField0_ & ~0x00000040); return this; } @@ -1698,6 +1841,10 @@ public final class CellProtos { to_bitField0_ |= 0x00000020; } result.value_ = value_; + if (((from_bitField0_ & 0x00000040) == 0x00000040)) { + to_bitField0_ |= 0x00000040; + } + result.tags_ = tags_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -1732,6 +1879,9 @@ public final class CellProtos { if (other.hasValue()) { setValue(other.getValue()); } + if (other.hasTags()) { + setTags(other.getTags()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -1984,6 +2134,42 @@ public final class CellProtos { return this; } + // optional bytes tags = 7; + private com.google.protobuf.ByteString tags_ = com.google.protobuf.ByteString.EMPTY; + /** + * optional bytes tags = 7; + */ + public boolean hasTags() { + return ((bitField0_ & 0x00000040) == 0x00000040); + } + /** + * optional bytes tags = 7; + */ + public com.google.protobuf.ByteString getTags() { + return tags_; + } + /** + * optional bytes tags = 7; + */ + public Builder setTags(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000040; + tags_ = value; + onChanged(); + return this; + } + /** + * optional bytes tags = 7; + */ + public Builder clearTags() { + bitField0_ = (bitField0_ & ~0x00000040); + tags_ = getDefaultInstance().getTags(); + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:KeyValue) } @@ -2014,17 +2200,18 @@ public final class CellProtos { descriptor; static { java.lang.String[] descriptorData = { - "\n\nCell.proto\"v\n\004Cell\022\013\n\003row\030\001 \001(\014\022\016\n\006fam" + - "ily\030\002 \001(\014\022\021\n\tqualifier\030\003 \001(\014\022\021\n\ttimestam" + - "p\030\004 \001(\004\022\034\n\tcell_type\030\005 \001(\0162\t.CellType\022\r\n" + - "\005value\030\006 \001(\014\"y\n\010KeyValue\022\013\n\003row\030\001 \002(\014\022\016\n" + - "\006family\030\002 \002(\014\022\021\n\tqualifier\030\003 \002(\014\022\021\n\ttime" + - "stamp\030\004 \001(\004\022\033\n\010key_type\030\005 \001(\0162\t.CellType" + - "\022\r\n\005value\030\006 \001(\014*`\n\010CellType\022\013\n\007MINIMUM\020\000" + - "\022\007\n\003PUT\020\004\022\n\n\006DELETE\020\010\022\021\n\rDELETE_COLUMN\020\014" + - "\022\021\n\rDELETE_FAMILY\020\016\022\014\n\007MAXIMUM\020\377\001B=\n*org" + - ".apache.hadoop.hbase.protobuf.generatedB", - "\nCellProtosH\001\240\001\001" + "\n\nCell.proto\"\204\001\n\004Cell\022\013\n\003row\030\001 \001(\014\022\016\n\006fa" + + "mily\030\002 \001(\014\022\021\n\tqualifier\030\003 \001(\014\022\021\n\ttimesta" + + "mp\030\004 \001(\004\022\034\n\tcell_type\030\005 \001(\0162\t.CellType\022\r" + + "\n\005value\030\006 \001(\014\022\014\n\004tags\030\007 \001(\014\"\207\001\n\010KeyValue" + + "\022\013\n\003row\030\001 \002(\014\022\016\n\006family\030\002 \002(\014\022\021\n\tqualifi" + + "er\030\003 \002(\014\022\021\n\ttimestamp\030\004 \001(\004\022\033\n\010key_type\030" + + "\005 \001(\0162\t.CellType\022\r\n\005value\030\006 \001(\014\022\014\n\004tags\030" + + "\007 \001(\014*`\n\010CellType\022\013\n\007MINIMUM\020\000\022\007\n\003PUT\020\004\022" + + "\n\n\006DELETE\020\010\022\021\n\rDELETE_COLUMN\020\014\022\021\n\rDELETE" + + "_FAMILY\020\016\022\014\n\007MAXIMUM\020\377\001B=\n*org.apache.ha", + "doop.hbase.protobuf.generatedB\nCellProto" + + "sH\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -2036,13 +2223,13 @@ public final class CellProtos { internal_static_Cell_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_Cell_descriptor, - new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "CellType", "Value", }); + new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "CellType", "Value", "Tags", }); internal_static_KeyValue_descriptor = getDescriptor().getMessageTypes().get(1); internal_static_KeyValue_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_KeyValue_descriptor, - new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "KeyType", "Value", }); + new java.lang.String[] { "Row", "Family", "Qualifier", "Timestamp", "KeyType", "Value", "Tags", }); return null; } }; diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java index f256adea2a0..838c35b2777 100644 --- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java +++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java @@ -9288,6 +9288,16 @@ public final class ClientProtos { * optional .MutationProto.DeleteType delete_type = 4; */ org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType getDeleteType(); + + // optional bytes tags = 5; + /** + * optional bytes tags = 5; + */ + boolean hasTags(); + /** + * optional bytes tags = 5; + */ + com.google.protobuf.ByteString getTags(); } /** * Protobuf type {@code MutationProto.ColumnValue.QualifierValue} @@ -9366,6 +9376,11 @@ public final class ClientProtos { } break; } + case 42: { + bitField0_ |= 0x00000010; + tags_ = input.readBytes(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -9470,11 +9485,28 @@ public final class ClientProtos { return deleteType_; } + // optional bytes tags = 5; + public static final int TAGS_FIELD_NUMBER = 5; + private com.google.protobuf.ByteString tags_; + /** + * optional bytes tags = 5; + */ + public boolean hasTags() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + /** + * optional bytes tags = 5; + */ + public com.google.protobuf.ByteString getTags() { + return tags_; + } + private void initFields() { qualifier_ = com.google.protobuf.ByteString.EMPTY; value_ = com.google.protobuf.ByteString.EMPTY; timestamp_ = 0L; deleteType_ = org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType.DELETE_ONE_VERSION; + tags_ = com.google.protobuf.ByteString.EMPTY; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -9500,6 +9532,9 @@ public final class ClientProtos { if (((bitField0_ & 0x00000008) == 0x00000008)) { output.writeEnum(4, deleteType_.getNumber()); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + output.writeBytes(5, tags_); + } getUnknownFields().writeTo(output); } @@ -9525,6 +9560,10 @@ public final class ClientProtos { size += com.google.protobuf.CodedOutputStream .computeEnumSize(4, deleteType_.getNumber()); } + if (((bitField0_ & 0x00000010) == 0x00000010)) { + size += com.google.protobuf.CodedOutputStream + .computeBytesSize(5, tags_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -9568,6 +9607,11 @@ public final class ClientProtos { result = result && (getDeleteType() == other.getDeleteType()); } + result = result && (hasTags() == other.hasTags()); + if (hasTags()) { + result = result && getTags() + .equals(other.getTags()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -9597,6 +9641,10 @@ public final class ClientProtos { hash = (37 * hash) + DELETE_TYPE_FIELD_NUMBER; hash = (53 * hash) + hashEnum(getDeleteType()); } + if (hasTags()) { + hash = (37 * hash) + TAGS_FIELD_NUMBER; + hash = (53 * hash) + getTags().hashCode(); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -9714,6 +9762,8 @@ public final class ClientProtos { bitField0_ = (bitField0_ & ~0x00000004); deleteType_ = org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.DeleteType.DELETE_ONE_VERSION; bitField0_ = (bitField0_ & ~0x00000008); + tags_ = com.google.protobuf.ByteString.EMPTY; + bitField0_ = (bitField0_ & ~0x00000010); return this; } @@ -9758,6 +9808,10 @@ public final class ClientProtos { to_bitField0_ |= 0x00000008; } result.deleteType_ = deleteType_; + if (((from_bitField0_ & 0x00000010) == 0x00000010)) { + to_bitField0_ |= 0x00000010; + } + result.tags_ = tags_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -9786,6 +9840,9 @@ public final class ClientProtos { if (other.hasDeleteType()) { setDeleteType(other.getDeleteType()); } + if (other.hasTags()) { + setTags(other.getTags()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -9954,6 +10011,42 @@ public final class ClientProtos { return this; } + // optional bytes tags = 5; + private com.google.protobuf.ByteString tags_ = com.google.protobuf.ByteString.EMPTY; + /** + * optional bytes tags = 5; + */ + public boolean hasTags() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + /** + * optional bytes tags = 5; + */ + public com.google.protobuf.ByteString getTags() { + return tags_; + } + /** + * optional bytes tags = 5; + */ + public Builder setTags(com.google.protobuf.ByteString value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000010; + tags_ = value; + onChanged(); + return this; + } + /** + * optional bytes tags = 5; + */ + public Builder clearTags() { + bitField0_ = (bitField0_ & ~0x00000010); + tags_ = getDefaultInstance().getTags(); + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:MutationProto.ColumnValue.QualifierValue) } @@ -27723,7 +27816,7 @@ public final class ClientProtos { "exists\030\002 \003(\010\"\200\001\n\tCondition\022\013\n\003row\030\001 \002(\014\022", "\016\n\006family\030\002 \002(\014\022\021\n\tqualifier\030\003 \002(\014\022\"\n\014co" + "mpare_type\030\004 \002(\0162\014.CompareType\022\037\n\ncompar" + - "ator\030\005 \002(\0132\013.Comparator\"\227\006\n\rMutationProt" + + "ator\030\005 \002(\0132\013.Comparator\"\246\006\n\rMutationProt" + "o\022\013\n\003row\030\001 \001(\014\0220\n\013mutate_type\030\002 \001(\0162\033.Mu" + "tationProto.MutationType\0220\n\014column_value" + "\030\003 \003(\0132\032.MutationProto.ColumnValue\022\021\n\tti" + @@ -27731,70 +27824,71 @@ public final class ClientProtos { "ytesPair\022:\n\ndurability\030\006 \001(\0162\031.MutationP" + "roto.Durability:\013USE_DEFAULT\022\036\n\ntime_ran" + "ge\030\007 \001(\0132\n.TimeRange\022\035\n\025associated_cell_", - "count\030\010 \001(\005\032\330\001\n\013ColumnValue\022\016\n\006family\030\001 " + + "count\030\010 \001(\005\032\347\001\n\013ColumnValue\022\016\n\006family\030\001 " + "\002(\014\022B\n\017qualifier_value\030\002 \003(\0132).MutationP" + - "roto.ColumnValue.QualifierValue\032u\n\016Quali" + - "fierValue\022\021\n\tqualifier\030\001 \001(\014\022\r\n\005value\030\002 " + - "\001(\014\022\021\n\ttimestamp\030\003 \001(\004\022.\n\013delete_type\030\004 " + - "\001(\0162\031.MutationProto.DeleteType\"W\n\nDurabi" + - "lity\022\017\n\013USE_DEFAULT\020\000\022\014\n\010SKIP_WAL\020\001\022\r\n\tA" + - "SYNC_WAL\020\002\022\014\n\010SYNC_WAL\020\003\022\r\n\tFSYNC_WAL\020\004\"" + - ">\n\014MutationType\022\n\n\006APPEND\020\000\022\r\n\tINCREMENT" + - "\020\001\022\007\n\003PUT\020\002\022\n\n\006DELETE\020\003\"p\n\nDeleteType\022\026\n", - "\022DELETE_ONE_VERSION\020\000\022\034\n\030DELETE_MULTIPLE" + - "_VERSIONS\020\001\022\021\n\rDELETE_FAMILY\020\002\022\031\n\025DELETE" + - "_FAMILY_VERSION\020\003\"r\n\rMutateRequest\022 \n\006re" + - "gion\030\001 \002(\0132\020.RegionSpecifier\022 \n\010mutation" + - "\030\002 \002(\0132\016.MutationProto\022\035\n\tcondition\030\003 \001(" + - "\0132\n.Condition\"<\n\016MutateResponse\022\027\n\006resul" + - "t\030\001 \001(\0132\007.Result\022\021\n\tprocessed\030\002 \001(\010\"\344\002\n\004" + - "Scan\022\027\n\006column\030\001 \003(\0132\007.Column\022!\n\tattribu" + - "te\030\002 \003(\0132\016.NameBytesPair\022\021\n\tstart_row\030\003 " + - "\001(\014\022\020\n\010stop_row\030\004 \001(\014\022\027\n\006filter\030\005 \001(\0132\007.", - "Filter\022\036\n\ntime_range\030\006 \001(\0132\n.TimeRange\022\027" + - "\n\014max_versions\030\007 \001(\r:\0011\022\032\n\014cache_blocks\030" + - "\010 \001(\010:\004true\022\022\n\nbatch_size\030\t \001(\r\022\027\n\017max_r" + - "esult_size\030\n \001(\004\022\023\n\013store_limit\030\013 \001(\r\022\024\n" + - "\014store_offset\030\014 \001(\r\022&\n\036load_column_famil" + - "ies_on_demand\030\r \001(\010\022\r\n\005small\030\016 \001(\010\"\236\001\n\013S" + - "canRequest\022 \n\006region\030\001 \001(\0132\020.RegionSpeci" + - "fier\022\023\n\004scan\030\002 \001(\0132\005.Scan\022\022\n\nscanner_id\030" + - "\003 \001(\004\022\026\n\016number_of_rows\030\004 \001(\r\022\025\n\rclose_s" + - "canner\030\005 \001(\010\022\025\n\rnext_call_seq\030\006 \001(\004\"y\n\014S", - "canResponse\022\030\n\020cells_per_result\030\001 \003(\r\022\022\n" + - "\nscanner_id\030\002 \001(\004\022\024\n\014more_results\030\003 \001(\010\022" + - "\013\n\003ttl\030\004 \001(\r\022\030\n\007results\030\005 \003(\0132\007.Result\"\263" + - "\001\n\024BulkLoadHFileRequest\022 \n\006region\030\001 \002(\0132" + - "\020.RegionSpecifier\0225\n\013family_path\030\002 \003(\0132 " + - ".BulkLoadHFileRequest.FamilyPath\022\026\n\016assi" + - "gn_seq_num\030\003 \001(\010\032*\n\nFamilyPath\022\016\n\006family" + - "\030\001 \002(\014\022\014\n\004path\030\002 \002(\t\"\'\n\025BulkLoadHFileRes" + - "ponse\022\016\n\006loaded\030\001 \002(\010\"a\n\026CoprocessorServ" + - "iceCall\022\013\n\003row\030\001 \002(\014\022\024\n\014service_name\030\002 \002", - "(\t\022\023\n\013method_name\030\003 \002(\t\022\017\n\007request\030\004 \002(\014" + - "\"d\n\031CoprocessorServiceRequest\022 \n\006region\030" + - "\001 \002(\0132\020.RegionSpecifier\022%\n\004call\030\002 \002(\0132\027." + - "CoprocessorServiceCall\"]\n\032CoprocessorSer" + - "viceResponse\022 \n\006region\030\001 \002(\0132\020.RegionSpe" + - "cifier\022\035\n\005value\030\002 \002(\0132\016.NameBytesPair\"B\n" + - "\013MultiAction\022 \n\010mutation\030\001 \001(\0132\016.Mutatio" + - "nProto\022\021\n\003get\030\002 \001(\0132\004.Get\"I\n\014ActionResul" + - "t\022\026\n\005value\030\001 \001(\0132\007.Result\022!\n\texception\030\002" + - " \001(\0132\016.NameBytesPair\"^\n\014MultiRequest\022 \n\006", - "region\030\001 \002(\0132\020.RegionSpecifier\022\034\n\006action" + - "\030\002 \003(\0132\014.MultiAction\022\016\n\006atomic\030\003 \001(\010\".\n\r" + - "MultiResponse\022\035\n\006result\030\001 \003(\0132\r.ActionRe" + - "sult2\342\002\n\rClientService\022 \n\003Get\022\013.GetReque" + - "st\032\014.GetResponse\022/\n\010MultiGet\022\020.MultiGetR" + - "equest\032\021.MultiGetResponse\022)\n\006Mutate\022\016.Mu" + - "tateRequest\032\017.MutateResponse\022#\n\004Scan\022\014.S" + - "canRequest\032\r.ScanResponse\022>\n\rBulkLoadHFi" + - "le\022\025.BulkLoadHFileRequest\032\026.BulkLoadHFil" + - "eResponse\022F\n\013ExecService\022\032.CoprocessorSe", - "rviceRequest\032\033.CoprocessorServiceRespons" + - "e\022&\n\005Multi\022\r.MultiRequest\032\016.MultiRespons" + - "eBB\n*org.apache.hadoop.hbase.protobuf.ge" + - "neratedB\014ClientProtosH\001\210\001\001\240\001\001" + "roto.ColumnValue.QualifierValue\032\203\001\n\016Qual" + + "ifierValue\022\021\n\tqualifier\030\001 \001(\014\022\r\n\005value\030\002" + + " \001(\014\022\021\n\ttimestamp\030\003 \001(\004\022.\n\013delete_type\030\004" + + " \001(\0162\031.MutationProto.DeleteType\022\014\n\004tags\030" + + "\005 \001(\014\"W\n\nDurability\022\017\n\013USE_DEFAULT\020\000\022\014\n\010" + + "SKIP_WAL\020\001\022\r\n\tASYNC_WAL\020\002\022\014\n\010SYNC_WAL\020\003\022" + + "\r\n\tFSYNC_WAL\020\004\">\n\014MutationType\022\n\n\006APPEND" + + "\020\000\022\r\n\tINCREMENT\020\001\022\007\n\003PUT\020\002\022\n\n\006DELETE\020\003\"p", + "\n\nDeleteType\022\026\n\022DELETE_ONE_VERSION\020\000\022\034\n\030" + + "DELETE_MULTIPLE_VERSIONS\020\001\022\021\n\rDELETE_FAM" + + "ILY\020\002\022\031\n\025DELETE_FAMILY_VERSION\020\003\"r\n\rMuta" + + "teRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpecif" + + "ier\022 \n\010mutation\030\002 \002(\0132\016.MutationProto\022\035\n" + + "\tcondition\030\003 \001(\0132\n.Condition\"<\n\016MutateRe" + + "sponse\022\027\n\006result\030\001 \001(\0132\007.Result\022\021\n\tproce" + + "ssed\030\002 \001(\010\"\344\002\n\004Scan\022\027\n\006column\030\001 \003(\0132\007.Co" + + "lumn\022!\n\tattribute\030\002 \003(\0132\016.NameBytesPair\022" + + "\021\n\tstart_row\030\003 \001(\014\022\020\n\010stop_row\030\004 \001(\014\022\027\n\006", + "filter\030\005 \001(\0132\007.Filter\022\036\n\ntime_range\030\006 \001(" + + "\0132\n.TimeRange\022\027\n\014max_versions\030\007 \001(\r:\0011\022\032" + + "\n\014cache_blocks\030\010 \001(\010:\004true\022\022\n\nbatch_size" + + "\030\t \001(\r\022\027\n\017max_result_size\030\n \001(\004\022\023\n\013store" + + "_limit\030\013 \001(\r\022\024\n\014store_offset\030\014 \001(\r\022&\n\036lo" + + "ad_column_families_on_demand\030\r \001(\010\022\r\n\005sm" + + "all\030\016 \001(\010\"\236\001\n\013ScanRequest\022 \n\006region\030\001 \001(" + + "\0132\020.RegionSpecifier\022\023\n\004scan\030\002 \001(\0132\005.Scan" + + "\022\022\n\nscanner_id\030\003 \001(\004\022\026\n\016number_of_rows\030\004" + + " \001(\r\022\025\n\rclose_scanner\030\005 \001(\010\022\025\n\rnext_call", + "_seq\030\006 \001(\004\"y\n\014ScanResponse\022\030\n\020cells_per_" + + "result\030\001 \003(\r\022\022\n\nscanner_id\030\002 \001(\004\022\024\n\014more" + + "_results\030\003 \001(\010\022\013\n\003ttl\030\004 \001(\r\022\030\n\007results\030\005" + + " \003(\0132\007.Result\"\263\001\n\024BulkLoadHFileRequest\022 " + + "\n\006region\030\001 \002(\0132\020.RegionSpecifier\0225\n\013fami" + + "ly_path\030\002 \003(\0132 .BulkLoadHFileRequest.Fam" + + "ilyPath\022\026\n\016assign_seq_num\030\003 \001(\010\032*\n\nFamil" + + "yPath\022\016\n\006family\030\001 \002(\014\022\014\n\004path\030\002 \002(\t\"\'\n\025B" + + "ulkLoadHFileResponse\022\016\n\006loaded\030\001 \002(\010\"a\n\026" + + "CoprocessorServiceCall\022\013\n\003row\030\001 \002(\014\022\024\n\014s", + "ervice_name\030\002 \002(\t\022\023\n\013method_name\030\003 \002(\t\022\017" + + "\n\007request\030\004 \002(\014\"d\n\031CoprocessorServiceReq" + + "uest\022 \n\006region\030\001 \002(\0132\020.RegionSpecifier\022%" + + "\n\004call\030\002 \002(\0132\027.CoprocessorServiceCall\"]\n" + + "\032CoprocessorServiceResponse\022 \n\006region\030\001 " + + "\002(\0132\020.RegionSpecifier\022\035\n\005value\030\002 \002(\0132\016.N" + + "ameBytesPair\"B\n\013MultiAction\022 \n\010mutation\030" + + "\001 \001(\0132\016.MutationProto\022\021\n\003get\030\002 \001(\0132\004.Get" + + "\"I\n\014ActionResult\022\026\n\005value\030\001 \001(\0132\007.Result" + + "\022!\n\texception\030\002 \001(\0132\016.NameBytesPair\"^\n\014M", + "ultiRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpec" + + "ifier\022\034\n\006action\030\002 \003(\0132\014.MultiAction\022\016\n\006a" + + "tomic\030\003 \001(\010\".\n\rMultiResponse\022\035\n\006result\030\001" + + " \003(\0132\r.ActionResult2\342\002\n\rClientService\022 \n" + + "\003Get\022\013.GetRequest\032\014.GetResponse\022/\n\010Multi" + + "Get\022\020.MultiGetRequest\032\021.MultiGetResponse" + + "\022)\n\006Mutate\022\016.MutateRequest\032\017.MutateRespo" + + "nse\022#\n\004Scan\022\014.ScanRequest\032\r.ScanResponse" + + "\022>\n\rBulkLoadHFile\022\025.BulkLoadHFileRequest" + + "\032\026.BulkLoadHFileResponse\022F\n\013ExecService\022", + "\032.CoprocessorServiceRequest\032\033.Coprocesso" + + "rServiceResponse\022&\n\005Multi\022\r.MultiRequest" + + "\032\016.MultiResponseBB\n*org.apache.hadoop.hb" + + "ase.protobuf.generatedB\014ClientProtosH\001\210\001" + + "\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -27866,7 +27960,7 @@ public final class ClientProtos { internal_static_MutationProto_ColumnValue_QualifierValue_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_MutationProto_ColumnValue_QualifierValue_descriptor, - new java.lang.String[] { "Qualifier", "Value", "Timestamp", "DeleteType", }); + new java.lang.String[] { "Qualifier", "Value", "Timestamp", "DeleteType", "Tags", }); internal_static_MutateRequest_descriptor = getDescriptor().getMessageTypes().get(9); internal_static_MutateRequest_fieldAccessorTable = new diff --git a/hbase-protocol/src/main/protobuf/Cell.proto b/hbase-protocol/src/main/protobuf/Cell.proto index 29e187461cf..98deb2cfbd9 100644 --- a/hbase-protocol/src/main/protobuf/Cell.proto +++ b/hbase-protocol/src/main/protobuf/Cell.proto @@ -48,6 +48,7 @@ message Cell { optional uint64 timestamp = 4; optional CellType cell_type = 5; optional bytes value = 6; + optional bytes tags = 7; } /** @@ -61,4 +62,5 @@ message KeyValue { optional uint64 timestamp = 4; optional CellType key_type = 5; optional bytes value = 6; + optional bytes tags = 7; } diff --git a/hbase-protocol/src/main/protobuf/Client.proto b/hbase-protocol/src/main/protobuf/Client.proto index 462b7df0232..3629504fd87 100644 --- a/hbase-protocol/src/main/protobuf/Client.proto +++ b/hbase-protocol/src/main/protobuf/Client.proto @@ -187,6 +187,7 @@ message MutationProto { optional bytes value = 2; optional uint64 timestamp = 3; optional DeleteType delete_type = 4; + optional bytes tags = 5; } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java index b54b26c8477..6c337bb532c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileReader.java @@ -329,4 +329,6 @@ public abstract class AbstractHFileReader implements HFile.Reader { public DataBlockEncoding getEncodingOnDisk() { return dataBlockEncoder.getEncodingOnDisk(); } + + public abstract int getMajorVersion(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java index ea5f0c0c3a0..d3e89f15c9e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/AbstractHFileWriter.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; @@ -61,9 +62,6 @@ public abstract class AbstractHFileWriter implements HFile.Writer { /** A "file info" block: a key-value map of file-wide metadata. */ protected FileInfo fileInfo = new HFile.FileInfo(); - /** Number of uncompressed bytes we allow per block. */ - protected final int blockSize; - /** Total # of key/value entries, i.e. how many times add() was called. */ protected long entryCount = 0; @@ -85,15 +83,6 @@ public abstract class AbstractHFileWriter implements HFile.Writer { /** {@link Writable}s representing meta block data. */ protected List metaData = new ArrayList(); - /** The compression algorithm used. NONE if no compression. */ - protected final Compression.Algorithm compressAlgo; - - /** - * The data block encoding which will be used. - * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding. - */ - protected final HFileDataBlockEncoder blockEncoder; - /** First key in a block. */ protected byte[] firstKeyInBlock = null; @@ -110,19 +99,28 @@ public abstract class AbstractHFileWriter implements HFile.Writer { */ protected final String name; + /** + * The data block encoding which will be used. + * {@link NoOpDataBlockEncoder#INSTANCE} if there is no encoding. + */ + protected final HFileDataBlockEncoder blockEncoder; + + protected final HFileContext hFileContext; + public AbstractHFileWriter(CacheConfig cacheConf, - FSDataOutputStream outputStream, Path path, int blockSize, - Compression.Algorithm compressAlgo, - HFileDataBlockEncoder dataBlockEncoder, - KVComparator comparator) { + FSDataOutputStream outputStream, Path path, + KVComparator comparator, HFileContext fileContext) { this.outputStream = outputStream; this.path = path; this.name = path != null ? path.getName() : outputStream.toString(); - this.blockSize = blockSize; - this.compressAlgo = compressAlgo == null - ? HFile.DEFAULT_COMPRESSION_ALGORITHM : compressAlgo; - this.blockEncoder = dataBlockEncoder != null - ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE; + this.hFileContext = fileContext; + if (hFileContext.getEncodingOnDisk() != DataBlockEncoding.NONE + || hFileContext.getEncodingInCache() != DataBlockEncoding.NONE) { + this.blockEncoder = new HFileDataBlockEncoderImpl(hFileContext.getEncodingOnDisk(), + hFileContext.getEncodingInCache()); + } else { + this.blockEncoder = NoOpDataBlockEncoder.INSTANCE; + } this.comparator = comparator != null ? comparator : KeyValue.COMPARATOR; @@ -234,7 +232,7 @@ public abstract class AbstractHFileWriter implements HFile.Writer { @Override public String toString() { return "writer=" + (path != null ? path.toString() : null) + ", name=" - + name + ", compression=" + compressAlgo.getName(); + + name + ", compression=" + hFileContext.getCompression().getName(); } /** @@ -245,7 +243,7 @@ public abstract class AbstractHFileWriter implements HFile.Writer { trailer.setMetaIndexCount(metaNames.size()); trailer.setTotalUncompressedBytes(totalUncompressedBytes+ trailer.getTrailerSize()); trailer.setEntryCount(entryCount); - trailer.setCompressionCodec(compressAlgo); + trailer.setCompressionCodec(hFileContext.getCompression()); trailer.serialize(outputStream); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java index 4fc3576240b..8938362f7ac 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/ChecksumUtil.java @@ -23,10 +23,7 @@ import java.nio.ByteBuffer; import java.util.zip.Checksum; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hbase.util.ChecksumFactory; import org.apache.hadoop.hbase.util.ChecksumType; /** @@ -107,7 +104,7 @@ public class ChecksumUtil { // when the minorVersion is 0, thus this is a defensive check for a // cannot-happen case. Since this is a cannot-happen case, it is // better to return false to indicate a checksum validation failure. - if (block.getMinorVersion() < HFileBlock.MINOR_VERSION_WITH_CHECKSUM) { + if (!block.getHFileContext().shouldUseHBaseChecksum()) { return false; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java index 38abbe30a2d..12c8359809e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/FixedFileTrailer.java @@ -54,11 +54,6 @@ import com.google.common.io.NullOutputStream; @InterfaceAudience.Private public class FixedFileTrailer { - private static final Log LOG = LogFactory.getLog(FixedFileTrailer.class); - - /** HFile minor version that introduced pbuf filetrailer */ - private static final int PBUF_TRAILER_MINOR_VERSION = 2; - /** * We store the comparator class name as a fixed-length field in the trailer. */ @@ -131,18 +126,13 @@ public class FixedFileTrailer { private static int[] computeTrailerSizeByVersion() { int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1]; - for (int version = HFile.MIN_FORMAT_VERSION; - version <= HFile.MAX_FORMAT_VERSION; - ++version) { - FixedFileTrailer fft = new FixedFileTrailer(version, HFileBlock.MINOR_VERSION_NO_CHECKSUM); - DataOutputStream dos = new DataOutputStream(new NullOutputStream()); - try { - fft.serialize(dos); - } catch (IOException ex) { - // The above has no reason to fail. - throw new RuntimeException(ex); - } - versionToSize[version] = dos.size(); + // We support only 2 major versions now. ie. V2, V3 + versionToSize[2] = 212; + for (int version = 3; version <= HFile.MAX_FORMAT_VERSION; version++) { + // Max FFT size for V3 and above is taken as 1KB for future enhancements + // if any. + // Unless the trailer size exceeds 1024 this can continue + versionToSize[version] = 1024; } return versionToSize; } @@ -184,11 +174,7 @@ public class FixedFileTrailer { DataOutputStream baosDos = new DataOutputStream(baos); BlockType.TRAILER.write(baosDos); - if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) { - serializeAsPB(baosDos); - } else { - serializeAsWritable(baosDos); - } + serializeAsPB(baosDos); // The last 4 bytes of the file encode the major and minor version universally baosDos.writeInt(materializeVersion(majorVersion, minorVersion)); @@ -233,29 +219,6 @@ public class FixedFileTrailer { } } - /** - * Write trailer data as writable - * @param outputStream - * @throws IOException - */ - void serializeAsWritable(DataOutputStream output) throws IOException { - output.writeLong(fileInfoOffset); - output.writeLong(loadOnOpenDataOffset); - output.writeInt(dataIndexCount); - - output.writeLong(uncompressedDataIndexSize); - - output.writeInt(metaIndexCount); - output.writeLong(totalUncompressedBytes); - output.writeLong(entryCount); - output.writeInt(compressionCodec.ordinal()); - - output.writeInt(numDataIndexLevels); - output.writeLong(firstDataBlockOffset); - output.writeLong(lastDataBlockOffset); - Bytes.writeStringFixedSize(output, comparatorClassName, MAX_COMPARATOR_NAME_LENGTH); - } - /** * Deserialize the fixed file trailer from the given stream. The version needs * to already be specified. Make sure this is consistent with @@ -269,7 +232,8 @@ public class FixedFileTrailer { BlockType.TRAILER.readAndCheck(inputStream); - if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) { + if (majorVersion > 2 + || (majorVersion == 2 && minorVersion >= HFileReaderV2.PBUF_TRAILER_MINOR_VERSION)) { deserializeFromPB(inputStream); } else { deserializeFromWritable(inputStream); @@ -655,7 +619,7 @@ public class FixedFileTrailer { * Create a 4 byte serialized version number by combining the * minor and major version numbers. */ - private static int materializeVersion(int majorVersion, int minorVersion) { + static int materializeVersion(int majorVersion, int minorVersion) { return ((majorVersion & 0x00ffffff) | (minorVersion << 24)); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 85244ce6a82..345345ae567 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -50,23 +50,21 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; -import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.BytesBytesPair; import org.apache.hadoop.hbase.protobuf.generated.HFileProtos; -import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; import org.apache.hadoop.hbase.util.FSUtils; -import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Writable; import com.google.common.base.Preconditions; @@ -156,7 +154,7 @@ public class HFile { /** Maximum supported HFile format version */ - public static final int MAX_FORMAT_VERSION = 2; + public static final int MAX_FORMAT_VERSION = 3; /** Default compression name: none. */ public final static String DEFAULT_COMPRESSION = @@ -292,6 +290,8 @@ public class HFile { void append(byte[] key, byte[] value) throws IOException; + void append (byte[] key, byte[] value, byte[] tag) throws IOException; + /** @return the path to this {@link HFile} */ Path getPath(); @@ -332,15 +332,9 @@ public class HFile { protected FileSystem fs; protected Path path; protected FSDataOutputStream ostream; - protected int blockSize = HColumnDescriptor.DEFAULT_BLOCKSIZE; - protected Compression.Algorithm compression = - HFile.DEFAULT_COMPRESSION_ALGORITHM; - protected HFileDataBlockEncoder encoder = NoOpDataBlockEncoder.INSTANCE; protected KVComparator comparator = KeyValue.COMPARATOR; protected InetSocketAddress[] favoredNodes; - protected ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE; - protected int bytesPerChecksum = DEFAULT_BYTES_PER_CHECKSUM; - protected boolean includeMVCCReadpoint = true; + private HFileContext fileContext; WriterFactory(Configuration conf, CacheConfig cacheConf) { this.conf = conf; @@ -361,29 +355,6 @@ public class HFile { return this; } - public WriterFactory withBlockSize(int blockSize) { - this.blockSize = blockSize; - return this; - } - - public WriterFactory withCompression(Compression.Algorithm compression) { - Preconditions.checkNotNull(compression); - this.compression = compression; - return this; - } - - public WriterFactory withCompression(String compressAlgo) { - Preconditions.checkNotNull(compression); - this.compression = AbstractHFileWriter.compressionByName(compressAlgo); - return this; - } - - public WriterFactory withDataBlockEncoder(HFileDataBlockEncoder encoder) { - Preconditions.checkNotNull(encoder); - this.encoder = encoder; - return this; - } - public WriterFactory withComparator(KVComparator comparator) { Preconditions.checkNotNull(comparator); this.comparator = comparator; @@ -396,23 +367,8 @@ public class HFile { return this; } - public WriterFactory withChecksumType(ChecksumType checksumType) { - Preconditions.checkNotNull(checksumType); - this.checksumType = checksumType; - return this; - } - - public WriterFactory withBytesPerChecksum(int bytesPerChecksum) { - this.bytesPerChecksum = bytesPerChecksum; - return this; - } - - /** - * @param includeMVCCReadpoint whether to write the mvcc readpoint to the file for each KV - * @return this (for chained invocation) - */ - public WriterFactory includeMVCCReadpoint(boolean includeMVCCReadpoint) { - this.includeMVCCReadpoint = includeMVCCReadpoint; + public WriterFactory withFileContext(HFileContext fileContext) { + this.fileContext = fileContext; return this; } @@ -424,16 +380,12 @@ public class HFile { if (path != null) { ostream = AbstractHFileWriter.createOutputStream(conf, fs, path, favoredNodes); } - return createWriter(fs, path, ostream, blockSize, - compression, encoder, comparator, checksumType, bytesPerChecksum, includeMVCCReadpoint); + return createWriter(fs, path, ostream, + comparator, fileContext); } - protected abstract Writer createWriter(FileSystem fs, Path path, - FSDataOutputStream ostream, int blockSize, - Compression.Algorithm compress, - HFileDataBlockEncoder dataBlockEncoder, - KVComparator comparator, ChecksumType checksumType, - int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException; + protected abstract Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream, + KVComparator comparator, HFileContext fileContext) throws IOException; } /** The configuration key for HFile version to use for new files */ @@ -466,6 +418,8 @@ public class HFile { switch (version) { case 2: return new HFileWriterV2.WriterFactoryV2(conf, cacheConf); + case 3: + return new HFileWriterV3.WriterFactoryV3(conf, cacheConf); default: throw new IllegalArgumentException("Cannot create writer for HFile " + "format version " + version); @@ -573,6 +527,9 @@ public class HFile { case 2: return new HFileReaderV2( path, trailer, fsdis, size, cacheConf, preferredEncodingInCache, hfs); + case 3 : + return new HFileReaderV3( + path, trailer, fsdis, size, cacheConf, preferredEncodingInCache, hfs); default: throw new CorruptHFileException("Invalid HFile version " + trailer.getMajorVersion()); } @@ -589,7 +546,6 @@ public class HFile { public static Reader createReaderWithEncoding( FileSystem fs, Path path, CacheConfig cacheConf, DataBlockEncoding preferredEncodingInCache) throws IOException { - final boolean closeIStream = true; FSDataInputStreamWrapper stream = new FSDataInputStreamWrapper(fs, path); return pickReaderVersion(path, stream, fs.getFileStatus(path).getLen(), cacheConf, preferredEncodingInCache, stream.getHfs()); @@ -648,15 +604,16 @@ public class HFile { } /** - * Metadata for this file. Conjured by the writer. Read in by the reader. + * Metadata for this file. Conjured by the writer. Read in by the reader. */ - static class FileInfo implements SortedMap { + public static class FileInfo implements SortedMap { static final String RESERVED_PREFIX = "hfile."; static final byte[] RESERVED_PREFIX_BYTES = Bytes.toBytes(RESERVED_PREFIX); static final byte [] LASTKEY = Bytes.toBytes(RESERVED_PREFIX + "LASTKEY"); static final byte [] AVG_KEY_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_KEY_LEN"); static final byte [] AVG_VALUE_LEN = Bytes.toBytes(RESERVED_PREFIX + "AVG_VALUE_LEN"); static final byte [] COMPARATOR = Bytes.toBytes(RESERVED_PREFIX + "COMPARATOR"); + public static final byte [] MAX_TAGS_LEN = Bytes.toBytes(RESERVED_PREFIX + "MAX_TAGS_LEN"); private final SortedMap map = new TreeMap(Bytes.BYTES_COMPARATOR); public FileInfo() { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index e3fbb337632..b05666d64e9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hbase.io.hfile; -import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; @@ -35,6 +34,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; @@ -43,7 +43,6 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; -import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; import org.apache.hadoop.hbase.util.ClassSize; @@ -86,12 +85,6 @@ import com.google.common.base.Preconditions; @InterfaceAudience.Private public class HFileBlock implements Cacheable { - /** Minor versions starting with this number have hbase checksums */ - static final int MINOR_VERSION_WITH_CHECKSUM = 1; - - /** minor version that does not support checksums */ - static final int MINOR_VERSION_NO_CHECKSUM = 0; - /** * On a checksum failure on a Reader, these many suceeding read * requests switch back to using hdfs checksums before auto-reenabling @@ -115,8 +108,8 @@ public class HFileBlock implements Cacheable { public static final int BYTE_BUFFER_HEAP_SIZE = (int) ClassSize.estimateBase( ByteBuffer.wrap(new byte[0], 0, 0).getClass(), false); - // minorVersion+offset+nextBlockOnDiskSizeWithHeader - public static final int EXTRA_SERIALIZATION_SPACE = 2 * Bytes.SIZEOF_INT + // meta.usesHBaseChecksum+offset+nextBlockOnDiskSizeWithHeader + public static final int EXTRA_SERIALIZATION_SPACE = Bytes.SIZEOF_BYTE + Bytes.SIZEOF_INT + Bytes.SIZEOF_LONG; /** @@ -137,8 +130,8 @@ public class HFileBlock implements Cacheable { } buf.position(buf.limit()); buf.limit(buf.limit() + HFileBlock.EXTRA_SERIALIZATION_SPACE); - int minorVersion=buf.getInt(); - HFileBlock ourBuffer = new HFileBlock(newByteBuffer, minorVersion); + boolean usesChecksum = buf.get() == (byte)1; + HFileBlock ourBuffer = new HFileBlock(newByteBuffer, usesChecksum); ourBuffer.offset = buf.getLong(); ourBuffer.nextBlockOnDiskSizeWithHeader = buf.getInt(); return ourBuffer; @@ -171,23 +164,13 @@ public class HFileBlock implements Cacheable { /** The offset of the previous block on disk */ private final long prevBlockOffset; - /** The Type of checksum, better to store the byte than an object */ - private final byte checksumType; - - /** The number of bytes for which a checksum is computed */ - private final int bytesPerChecksum; - /** Size on disk of header and data. Does not include checksum data */ private final int onDiskDataSizeWithHeader; - /** The minor version of the hfile. */ - private final int minorVersion; - /** The in-memory representation of the hfile block */ private ByteBuffer buf; - - /** Whether there is a memstore timestamp after every key/value */ - private boolean includesMemstoreTS; + /** Meta data that holds meta information on the hfileblock**/ + private HFileContext fileContext; /** * The offset of this block in the file. Populated by the reader for @@ -220,17 +203,16 @@ public class HFileBlock implements Cacheable { * @param fillHeader true to fill in the first {@link HConstants#HFILEBLOCK_HEADER_SIZE} bytes of * the buffer based on the header fields provided * @param offset the file offset the block was read from - * @param minorVersion the minor version of this block * @param bytesPerChecksum the number of bytes per checksum chunk * @param checksumType the checksum algorithm to use * @param onDiskDataSizeWithHeader size of header and data on disk not * including checksum data + * @param fileContext HFile meta data */ HFileBlock(BlockType blockType, int onDiskSizeWithoutHeader, int uncompressedSizeWithoutHeader, long prevBlockOffset, ByteBuffer buf, - boolean fillHeader, long offset, boolean includesMemstoreTS, - int minorVersion, int bytesPerChecksum, byte checksumType, - int onDiskDataSizeWithHeader) { + boolean fillHeader, long offset, + int onDiskDataSizeWithHeader, HFileContext fileContext) { this.blockType = blockType; this.onDiskSizeWithoutHeader = onDiskSizeWithoutHeader; this.uncompressedSizeWithoutHeader = uncompressedSizeWithoutHeader; @@ -239,11 +221,8 @@ public class HFileBlock implements Cacheable { if (fillHeader) overwriteHeader(); this.offset = offset; - this.includesMemstoreTS = includesMemstoreTS; - this.minorVersion = minorVersion; - this.bytesPerChecksum = bytesPerChecksum; - this.checksumType = checksumType; this.onDiskDataSizeWithHeader = onDiskDataSizeWithHeader; + this.fileContext = fileContext; } /** @@ -254,20 +233,21 @@ public class HFileBlock implements Cacheable { * because majorNumbers indicate the format of a HFile whereas minorNumbers * indicate the format inside a HFileBlock. */ - HFileBlock(ByteBuffer b, int minorVersion) throws IOException { + HFileBlock(ByteBuffer b, boolean usesHBaseChecksum) throws IOException { b.rewind(); blockType = BlockType.read(b); onDiskSizeWithoutHeader = b.getInt(); uncompressedSizeWithoutHeader = b.getInt(); prevBlockOffset = b.getLong(); - this.minorVersion = minorVersion; - if (minorVersion >= MINOR_VERSION_WITH_CHECKSUM) { - this.checksumType = b.get(); - this.bytesPerChecksum = b.getInt(); + this.fileContext = new HFileContext(); + this.fileContext.setUsesHBaseChecksum(usesHBaseChecksum); + if (usesHBaseChecksum) { + this.fileContext.setChecksumType(ChecksumType.codeToType(b.get())); + this.fileContext.setBytesPerChecksum(b.getInt()); this.onDiskDataSizeWithHeader = b.getInt(); } else { - this.checksumType = ChecksumType.NULL.getCode(); - this.bytesPerChecksum = 0; + this.fileContext.setChecksumType(ChecksumType.NULL); + this.fileContext.setBytesPerChecksum(0); this.onDiskDataSizeWithHeader = onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; } @@ -417,9 +397,9 @@ public class HFileBlock implements Cacheable { "uncompressedSizeWithoutHeader"); sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset"); - if (minorVersion >= MINOR_VERSION_WITH_CHECKSUM) { - sanityCheckAssertion(buf.get(), checksumType, "checksumType"); - sanityCheckAssertion(buf.getInt(), bytesPerChecksum, "bytesPerChecksum"); + if (this.fileContext.shouldUseHBaseChecksum()) { + sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType"); + sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), "bytesPerChecksum"); sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader, "onDiskDataSizeWithHeader"); } @@ -540,17 +520,15 @@ public class HFileBlock implements Cacheable { public long heapSize() { long size = ClassSize.align( ClassSize.OBJECT + - // Block type and byte buffer references - 2 * ClassSize.REFERENCE + + // Block type, byte buffer and meta references + 3 * ClassSize.REFERENCE + // On-disk size, uncompressed size, and next block's on-disk size - // bytePerChecksum, onDiskDataSize and minorVersion - 6 * Bytes.SIZEOF_INT + - // Checksum type - 1 * Bytes.SIZEOF_BYTE + + // bytePerChecksum and onDiskDataSize + 4 * Bytes.SIZEOF_INT + // This and previous block offset 2 * Bytes.SIZEOF_LONG + - // "Include memstore timestamp" flag - Bytes.SIZEOF_BOOLEAN + // Heap size of the meta object. meta will be always not null. + fileContext.heapSize() ); if (buf != null) { @@ -698,35 +676,24 @@ public class HFileBlock implements Cacheable { /** The offset of the previous block of the same type */ private long prevOffset; - - /** Whether we are including memstore timestamp after every key/value */ - private boolean includesMemstoreTS; - - /** Checksum settings */ - private ChecksumType checksumType; - private int bytesPerChecksum; + /** Meta data that holds information about the hfileblock**/ + private HFileContext fileContext; /** - * @param compressionAlgorithm compression algorithm to use * @param dataBlockEncoder data block encoding algorithm to use - * @param checksumType type of checksum - * @param bytesPerChecksum bytes per checksum */ - public Writer(Compression.Algorithm compressionAlgorithm, - HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS, - ChecksumType checksumType, int bytesPerChecksum) { + public Writer(HFileDataBlockEncoder dataBlockEncoder, HFileContext fileContext) { this.dataBlockEncoder = dataBlockEncoder != null ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE; - defaultBlockEncodingCtx = - new HFileBlockDefaultEncodingContext(compressionAlgorithm, null, HConstants.HFILEBLOCK_DUMMY_HEADER); - dataBlockEncodingCtx = - this.dataBlockEncoder.newOnDiskDataBlockEncodingContext( - compressionAlgorithm, HConstants.HFILEBLOCK_DUMMY_HEADER); + defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null, + HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext); + dataBlockEncodingCtx = this.dataBlockEncoder + .newOnDiskDataBlockEncodingContext(HConstants.HFILEBLOCK_DUMMY_HEADER, fileContext); - if (bytesPerChecksum < HConstants.HFILEBLOCK_HEADER_SIZE) { + if (fileContext.getBytesPerChecksum() < HConstants.HFILEBLOCK_HEADER_SIZE) { throw new RuntimeException("Unsupported value of bytesPerChecksum. " + " Minimum is " + HConstants.HFILEBLOCK_HEADER_SIZE + " but the configured value is " + - bytesPerChecksum); + fileContext.getBytesPerChecksum()); } baosInMemory = new ByteArrayOutputStream(); @@ -735,9 +702,7 @@ public class HFileBlock implements Cacheable { for (int i = 0; i < prevOffsetByType.length; ++i) prevOffsetByType[i] = -1; - this.includesMemstoreTS = includesMemstoreTS; - this.checksumType = checksumType; - this.bytesPerChecksum = bytesPerChecksum; + this.fileContext = fileContext; } /** @@ -821,7 +786,7 @@ public class HFileBlock implements Cacheable { int numBytes = (int) ChecksumUtil.numBytes( onDiskBytesWithHeader.length, - bytesPerChecksum); + fileContext.getBytesPerChecksum()); // put the header for on disk bytes putHeader(onDiskBytesWithHeader, 0, @@ -835,7 +800,7 @@ public class HFileBlock implements Cacheable { onDiskChecksum = new byte[numBytes]; ChecksumUtil.generateChecksums( onDiskBytesWithHeader, 0, onDiskBytesWithHeader.length, - onDiskChecksum, 0, checksumType, bytesPerChecksum); + onDiskChecksum, 0, fileContext.getChecksumType(), fileContext.getBytesPerChecksum()); } /** @@ -848,9 +813,8 @@ public class HFileBlock implements Cacheable { ByteBuffer.wrap(uncompressedBytesWithHeader, HConstants.HFILEBLOCK_HEADER_SIZE, uncompressedBytesWithHeader.length - HConstants.HFILEBLOCK_HEADER_SIZE).slice(); - //do the encoding - dataBlockEncoder.beforeWriteToDisk(rawKeyValues, - includesMemstoreTS, dataBlockEncodingCtx, blockType); + // do the encoding + dataBlockEncoder.beforeWriteToDisk(rawKeyValues, dataBlockEncodingCtx, blockType); uncompressedBytesWithHeader = dataBlockEncodingCtx.getUncompressedBytesWithHeader(); @@ -873,8 +837,8 @@ public class HFileBlock implements Cacheable { offset = Bytes.putInt(dest, offset, onDiskSize - HConstants.HFILEBLOCK_HEADER_SIZE); offset = Bytes.putInt(dest, offset, uncompressedSize - HConstants.HFILEBLOCK_HEADER_SIZE); offset = Bytes.putLong(dest, offset, prevOffset); - offset = Bytes.putByte(dest, offset, checksumType.getCode()); - offset = Bytes.putInt(dest, offset, bytesPerChecksum); + offset = Bytes.putByte(dest, offset, fileContext.getChecksumType().getCode()); + offset = Bytes.putInt(dest, offset, fileContext.getBytesPerChecksum()); Bytes.putInt(dest, offset, onDiskDataSize); } @@ -1055,12 +1019,13 @@ public class HFileBlock implements Cacheable { * 0 value in bytesPerChecksum. */ public HFileBlock getBlockForCaching() { + HFileContext newContext = fileContext.clone(); + newContext.setBytesPerChecksum(0); + newContext.setChecksumType(ChecksumType.NULL); // no checksums in cached data return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(), - getUncompressedSizeWithoutHeader(), prevOffset, - getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset, - includesMemstoreTS, MINOR_VERSION_WITH_CHECKSUM, - 0, ChecksumType.NULL.getCode(), // no checksums in cached data - onDiskBytesWithHeader.length + onDiskChecksum.length); + getUncompressedSizeWithoutHeader(), prevOffset, getUncompressedBufferWithHeader(), + DONT_FILL_HEADER, startOffset, + onDiskBytesWithHeader.length + onDiskChecksum.length, newContext); } } @@ -1134,14 +1099,10 @@ public class HFileBlock implements Cacheable { */ private abstract static class AbstractFSReader implements FSReader { /** Compression algorithm used by the {@link HFile} */ - protected Compression.Algorithm compressAlgo; /** The size of the file we are reading from, or -1 if unknown. */ protected long fileSize; - /** The minor version of this reader */ - private int minorVersion; - /** The size of the header */ protected final int hdrSize; @@ -1156,14 +1117,15 @@ public class HFileBlock implements Cacheable { /** The default buffer size for our buffered streams */ public static final int DEFAULT_BUFFER_SIZE = 1 << 20; - public AbstractFSReader(Algorithm compressAlgo, long fileSize, int minorVersion, - HFileSystem hfs, Path path) throws IOException { - this.compressAlgo = compressAlgo; + protected HFileContext fileContext; + + public AbstractFSReader(long fileSize, HFileSystem hfs, Path path, HFileContext fileContext) + throws IOException { this.fileSize = fileSize; - this.minorVersion = minorVersion; this.hfs = hfs; this.path = path; - this.hdrSize = headerSize(minorVersion); + this.fileContext = fileContext; + this.hdrSize = headerSize(fileContext.shouldUseHBaseChecksum()); } @Override @@ -1266,12 +1228,6 @@ public class HFileBlock implements Cacheable { hdrSize; } - /** - * @return The minorVersion of this HFile - */ - protected int getMinorVersion() { - return minorVersion; - } } /** @@ -1290,9 +1246,6 @@ public class HFileBlock implements Cacheable { * does or doesn't do checksum validations in the filesystem */ protected FSDataInputStreamWrapper streamWrapper; - /** Whether we include memstore timestamp in data blocks */ - protected boolean includesMemstoreTS; - /** Data block encoding used to read from file */ protected HFileDataBlockEncoder dataBlockEncoder = NoOpDataBlockEncoder.INSTANCE; @@ -1309,28 +1262,24 @@ public class HFileBlock implements Cacheable { } }; - public FSReaderV2(FSDataInputStreamWrapper stream, Algorithm compressAlgo, long fileSize, - int minorVersion, HFileSystem hfs, Path path) throws IOException { - super(compressAlgo, fileSize, minorVersion, hfs, path); + public FSReaderV2(FSDataInputStreamWrapper stream, long fileSize, HFileSystem hfs, Path path, + HFileContext fileContext) throws IOException { + super(fileSize, hfs, path, fileContext); this.streamWrapper = stream; // Older versions of HBase didn't support checksum. - boolean forceNoHBaseChecksum = (this.getMinorVersion() < MINOR_VERSION_WITH_CHECKSUM); - this.streamWrapper.prepareForBlockReader(forceNoHBaseChecksum); - + this.streamWrapper.prepareForBlockReader(!fileContext.shouldUseHBaseChecksum()); defaultDecodingCtx = - new HFileBlockDefaultDecodingContext(compressAlgo); + new HFileBlockDefaultDecodingContext(fileContext); encodedBlockDecodingCtx = - new HFileBlockDefaultDecodingContext(compressAlgo); + new HFileBlockDefaultDecodingContext(fileContext); } /** * A constructor that reads files with the latest minor version. * This is used by unit tests only. */ - FSReaderV2(FSDataInputStream istream, Algorithm compressAlgo, - long fileSize) throws IOException { - this(new FSDataInputStreamWrapper(istream), compressAlgo, fileSize, - HFileReaderV2.MAX_MINOR_VERSION, null, null); + FSReaderV2(FSDataInputStream istream, long fileSize, HFileContext fileContext) throws IOException { + this(new FSDataInputStreamWrapper(istream), fileSize, null, null, fileContext); } /** @@ -1490,7 +1439,7 @@ public class HFileBlock implements Cacheable { // from memory if using compression. Here we have already read the // block's header try { - b = new HFileBlock(headerBuf, getMinorVersion()); + b = new HFileBlock(headerBuf, this.fileContext.shouldUseHBaseChecksum()); } catch (IOException ex) { // Seen in load testing. Provide comprehensive debug info. throw new IOException("Failed to read compressed block at " @@ -1528,8 +1477,7 @@ public class HFileBlock implements Cacheable { readAtOffset(is, headerBuf.array(), headerBuf.arrayOffset(), hdrSize, false, offset, pread); } - - b = new HFileBlock(headerBuf, getMinorVersion()); + b = new HFileBlock(headerBuf, this.fileContext.shouldUseHBaseChecksum()); onDiskBlock = new byte[b.getOnDiskSizeWithHeader() + hdrSize]; System.arraycopy(headerBuf.array(), headerBuf.arrayOffset(), onDiskBlock, 0, hdrSize); @@ -1538,7 +1486,7 @@ public class HFileBlock implements Cacheable { - hdrSize, true, offset + hdrSize, pread); onDiskSizeWithHeader = b.onDiskSizeWithoutHeader + hdrSize; } - + Algorithm compressAlgo = fileContext.getCompression(); boolean isCompressed = compressAlgo != null && compressAlgo != Compression.Algorithm.NONE; @@ -1576,7 +1524,7 @@ public class HFileBlock implements Cacheable { // contains the header of next block, so no need to set next // block's header in it. b = new HFileBlock(ByteBuffer.wrap(onDiskBlock, 0, - onDiskSizeWithHeader), getMinorVersion()); + onDiskSizeWithHeader), this.fileContext.shouldUseHBaseChecksum()); } b.nextBlockOnDiskSizeWithHeader = nextBlockOnDiskSize; @@ -1588,19 +1536,19 @@ public class HFileBlock implements Cacheable { prefetchedHeader.header, 0, hdrSize); } - b.includesMemstoreTS = includesMemstoreTS; b.offset = offset; + b.fileContext.setIncludesTags(this.fileContext.shouldIncludeTags()); + b.fileContext.setIncludesMvcc(this.fileContext.shouldIncludeMvcc()); return b; } - void setIncludesMemstoreTS(boolean enabled) { - includesMemstoreTS = enabled; + void setIncludesMemstoreTS(boolean includesMemstoreTS) { + this.fileContext.setIncludesMvcc(includesMemstoreTS); } void setDataBlockEncoder(HFileDataBlockEncoder encoder) { this.dataBlockEncoder = encoder; - encodedBlockDecodingCtx = encoder.newOnDiskDataBlockDecodingContext( - this.compressAlgo); + encodedBlockDecodingCtx = encoder.newOnDiskDataBlockDecodingContext(this.fileContext); } /** @@ -1634,14 +1582,11 @@ public class HFileBlock implements Cacheable { ByteBuffer dupBuf = this.buf.duplicate(); dupBuf.rewind(); destination.put(dupBuf); - destination.putInt(this.minorVersion); - destination.putLong(this.offset); - destination.putInt(this.nextBlockOnDiskSizeWithHeader); - destination.rewind(); + serializeExtraInfo(destination); } public void serializeExtraInfo(ByteBuffer destination) { - destination.putInt(this.minorVersion); + destination.put(this.fileContext.shouldUseHBaseChecksum() ? (byte) 1 : (byte) 0); destination.putLong(this.offset); destination.putInt(this.nextBlockOnDiskSizeWithHeader); destination.rewind(); @@ -1696,10 +1641,6 @@ public class HFileBlock implements Cacheable { return true; } - public boolean doesIncludeMemstoreTS() { - return includesMemstoreTS; - } - public DataBlockEncoding getDataBlockEncoding() { if (blockType == BlockType.ENCODED_DATA) { return DataBlockEncoding.getEncodingById(getDataBlockEncodingId()); @@ -1708,21 +1649,17 @@ public class HFileBlock implements Cacheable { } byte getChecksumType() { - return this.checksumType; + return this.fileContext.getChecksumType().getCode(); } int getBytesPerChecksum() { - return this.bytesPerChecksum; + return this.fileContext.getBytesPerChecksum(); } int getOnDiskDataSizeWithHeader() { return this.onDiskDataSizeWithHeader; } - int getMinorVersion() { - return this.minorVersion; - } - /** * Calcuate the number of bytes required to store all the checksums * for this block. Each checksum value is a 4 byte integer. @@ -1732,44 +1669,48 @@ public class HFileBlock implements Cacheable { // data to validate. Similarly, a zero value in this.bytesPerChecksum // indicates that cached blocks do not have checksum data because // checksums were already validated when the block was read from disk. - if (minorVersion < MINOR_VERSION_WITH_CHECKSUM || this.bytesPerChecksum == 0) { + if (!fileContext.shouldUseHBaseChecksum() || this.fileContext.getBytesPerChecksum() == 0) { return 0; } - return (int)ChecksumUtil.numBytes(onDiskDataSizeWithHeader, bytesPerChecksum); + return (int)ChecksumUtil.numBytes(onDiskDataSizeWithHeader, this.fileContext.getBytesPerChecksum()); } /** * Returns the size of this block header. */ public int headerSize() { - return headerSize(this.minorVersion); + return headerSize(this.fileContext.shouldUseHBaseChecksum()); } /** * Maps a minor version to the size of the header. */ - public static int headerSize(int minorVersion) { - if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) { - return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; + public static int headerSize(boolean usesHBaseChecksum) { + if (usesHBaseChecksum) { + return HConstants.HFILEBLOCK_HEADER_SIZE; } - return HConstants.HFILEBLOCK_HEADER_SIZE; + return HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; } /** * Return the appropriate DUMMY_HEADER for the minor version */ public byte[] getDummyHeaderForVersion() { - return getDummyHeaderForVersion(minorVersion); + return getDummyHeaderForVersion(this.fileContext.shouldUseHBaseChecksum()); } /** * Return the appropriate DUMMY_HEADER for the minor version */ - static private byte[] getDummyHeaderForVersion(int minorVersion) { - if (minorVersion < MINOR_VERSION_WITH_CHECKSUM) { - return DUMMY_HEADER_NO_CHECKSUM; + static private byte[] getDummyHeaderForVersion(boolean usesHBaseChecksum) { + if (usesHBaseChecksum) { + return HConstants.HFILEBLOCK_DUMMY_HEADER; } - return HConstants.HFILEBLOCK_DUMMY_HEADER; + return DUMMY_HEADER_NO_CHECKSUM; + } + + public HFileContext getHFileContext() { + return this.fileContext; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java index 7d2cb77e3ca..829ffa07312 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoder.java @@ -20,10 +20,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; -import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; +import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; import org.apache.hadoop.hbase.util.Bytes; /** @@ -47,6 +46,7 @@ public interface HFileDataBlockEncoder { * * @param block a block in an on-disk format (read from HFile or freshly * generated). + * @param isCompaction * @return non null block which is coded according to the settings. */ HFileBlock diskToCacheFormat( @@ -63,7 +63,6 @@ public interface HFileDataBlockEncoder { */ void beforeWriteToDisk( ByteBuffer in, - boolean includesMemstoreTS, HFileBlockEncodingContext encodingResult, BlockType blockType ) throws IOException; @@ -100,24 +99,21 @@ public interface HFileDataBlockEncoder { * encoding context should also perform compression if compressionAlgorithm is * valid. * - * @param compressionAlgorithm compression algorithm * @param headerBytes header bytes + * @param fileContext HFile meta data * @return a new {@link HFileBlockEncodingContext} object */ - HFileBlockEncodingContext newOnDiskDataBlockEncodingContext( - Algorithm compressionAlgorithm, byte[] headerBytes - ); + HFileBlockEncodingContext newOnDiskDataBlockEncodingContext(byte[] headerBytes, + HFileContext fileContext); /** * create a encoder specific decoding context for reading. And the * decoding context should also do decompression if compressionAlgorithm * is valid. * - * @param compressionAlgorithm + * @param fileContext - HFile meta data * @return a new {@link HFileBlockDecodingContext} object */ - HFileBlockDecodingContext newOnDiskDataBlockDecodingContext( - Algorithm compressionAlgorithm - ); + HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(HFileContext fileContext); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java index 228c4bb063e..51f4b58d364 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileDataBlockEncoderImpl.java @@ -21,13 +21,12 @@ import java.nio.ByteBuffer; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; -import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; @@ -156,8 +155,8 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { return block; } // Encode the unencoded block with the in-cache encoding. - return encodeDataBlock(block, inCache, block.doesIncludeMemstoreTS(), - createInCacheEncodingContext()); + return encodeDataBlock(block, inCache, + createInCacheEncodingContext(block.getHFileContext())); } if (block.getBlockType() == BlockType.ENCODED_DATA) { @@ -183,7 +182,6 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { */ @Override public void beforeWriteToDisk(ByteBuffer in, - boolean includesMemstoreTS, HFileBlockEncodingContext encodeCtx, BlockType blockType) throws IOException { if (onDisk == DataBlockEncoding.NONE) { @@ -192,8 +190,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { in.array(), blockType); return; } - encodeBufferToHFileBlockBuffer(in, onDisk, - includesMemstoreTS, encodeCtx); + encodeBufferToHFileBlockBuffer(in, onDisk, encodeCtx); } @Override @@ -209,15 +206,13 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { * * @param in input data to encode * @param algo encoding algorithm - * @param includesMemstoreTS includes memstore timestamp or not * @param encodeCtx where will the output data be stored */ - private void encodeBufferToHFileBlockBuffer(ByteBuffer in, - DataBlockEncoding algo, boolean includesMemstoreTS, + private void encodeBufferToHFileBlockBuffer(ByteBuffer in, DataBlockEncoding algo, HFileBlockEncodingContext encodeCtx) { DataBlockEncoder encoder = algo.getEncoder(); try { - encoder.encodeKeyValues(in, includesMemstoreTS, encodeCtx); + encoder.encodeKeyValues(in, encodeCtx); } catch (IOException e) { throw new RuntimeException(String.format( "Bug in data block encoder " @@ -227,12 +222,11 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { } } - private HFileBlock encodeDataBlock(HFileBlock block, - DataBlockEncoding algo, boolean includesMemstoreTS, + private HFileBlock encodeDataBlock(HFileBlock block, DataBlockEncoding algo, HFileBlockEncodingContext encodingCtx) { encodingCtx.setDummyHeader(block.getDummyHeaderForVersion()); encodeBufferToHFileBlockBuffer( - block.getBufferWithoutHeader(), algo, includesMemstoreTS, encodingCtx); + block.getBufferWithoutHeader(), algo, encodingCtx); byte[] encodedUncompressedBytes = encodingCtx.getUncompressedBytesWithHeader(); ByteBuffer bufferWrapper = ByteBuffer.wrap(encodedUncompressedBytes); @@ -241,9 +235,7 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { block.getOnDiskSizeWithoutHeader(), sizeWithoutHeader, block.getPrevBlockOffset(), bufferWrapper, HFileBlock.FILL_HEADER, block.getOffset(), - includesMemstoreTS, block.getMinorVersion(), - block.getBytesPerChecksum(), block.getChecksumType(), - block.getOnDiskDataSizeWithHeader()); + block.getOnDiskDataSizeWithHeader(), encodingCtx.getHFileContext()); return encodedBlock; } @@ -253,14 +245,14 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { * See HBASE-8732 * @return a new in cache encoding context */ - private HFileBlockEncodingContext createInCacheEncodingContext() { + private HFileBlockEncodingContext createInCacheEncodingContext(HFileContext meta) { + HFileContext newMeta = meta.clone(); return (inCache != DataBlockEncoding.NONE) ? - this.inCache.getEncoder().newDataBlockEncodingContext( - Algorithm.NONE, this.inCache, dummyHeader) - : - // create a default encoding context - new HFileBlockDefaultEncodingContext(Algorithm.NONE, - this.inCache, dummyHeader); + this.inCache.getEncoder().newDataBlockEncodingContext( + this.inCache, dummyHeader, newMeta) + : + // create a default encoding context + new HFileBlockDefaultEncodingContext(this.inCache, dummyHeader, newMeta); } @Override @@ -271,29 +263,25 @@ public class HFileDataBlockEncoderImpl implements HFileDataBlockEncoder { @Override public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext( - Algorithm compressionAlgorithm, byte[] dummyHeader) { + byte[] dummyHeader, HFileContext fileContext) { if (onDisk != null) { DataBlockEncoder encoder = onDisk.getEncoder(); if (encoder != null) { - return encoder.newDataBlockEncodingContext( - compressionAlgorithm, onDisk, dummyHeader); + return encoder.newDataBlockEncodingContext(onDisk, dummyHeader, fileContext); } } - return new HFileBlockDefaultEncodingContext(compressionAlgorithm, - null, dummyHeader); + return new HFileBlockDefaultEncodingContext(null, dummyHeader, fileContext); } @Override - public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext( - Algorithm compressionAlgorithm) { + public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(HFileContext fileContext) { if (onDisk != null) { DataBlockEncoder encoder = onDisk.getEncoder(); if (encoder != null) { - return encoder.newDataBlockDecodingContext( - compressionAlgorithm); + return encoder.newDataBlockDecodingContext(fileContext); } } - return new HFileBlockDefaultDecodingContext(compressionAlgorithm); + return new HFileBlockDefaultDecodingContext(fileContext); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index 0a6eb458168..db2a2543f3c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.BloomFilter; @@ -275,6 +276,12 @@ public class HFilePrettyPrinter { System.out.print("K: " + kv); if (printValue) { System.out.print(" V: " + Bytes.toStringBinary(kv.getValue())); + int i = 0; + List tags = kv.getTags(); + for (Tag tag : tags) { + System.out + .print(String.format(" T[%d]: %s", i++, Bytes.toStringBinary(tag.getValue()))); + } } System.out.println(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java index b7bcda741a9..ce73ee5f8cb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java @@ -1,5 +1,4 @@ /* - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -27,15 +26,16 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; -import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; +import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IdLock; import org.apache.hadoop.io.WritableUtils; @@ -50,21 +50,28 @@ public class HFileReaderV2 extends AbstractHFileReader { private static final Log LOG = LogFactory.getLog(HFileReaderV2.class); + /** Minor versions in HFile V2 starting with this number have hbase checksums */ + public static final int MINOR_VERSION_WITH_CHECKSUM = 1; + /** In HFile V2 minor version that does not support checksums */ + public static final int MINOR_VERSION_NO_CHECKSUM = 0; + + /** HFile minor version that introduced pbuf filetrailer */ + public static final int PBUF_TRAILER_MINOR_VERSION = 2; + /** * The size of a (key length, value length) tuple that prefixes each entry in * a data block. */ - private static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; + public final static int KEY_VALUE_LEN_SIZE = 2 * Bytes.SIZEOF_INT; - private boolean includesMemstoreTS = false; - private boolean decodeMemstoreTS = false; - - private boolean shouldIncludeMemstoreTS() { + protected boolean includesMemstoreTS = false; + protected boolean decodeMemstoreTS = false; + protected boolean shouldIncludeMemstoreTS() { return includesMemstoreTS; } /** Filesystem-level block reader. */ - private HFileBlock.FSReader fsBlockReader; + protected HFileBlock.FSReader fsBlockReader; /** * A "sparse lock" implementation allowing to lock on a particular block @@ -90,6 +97,7 @@ public class HFileReaderV2 extends AbstractHFileReader { /** Minor versions starting with this number have faked index key */ static final int MINOR_VERSION_WITH_FAKED_KEY = 3; + protected HFileContext hfileContext; /** * Opens a HFile. You must load the index before you can use it by calling @@ -103,16 +111,19 @@ public class HFileReaderV2 extends AbstractHFileReader { * @param preferredEncodingInCache the encoding to use in cache in case we * have a choice. If the file is already encoded on disk, we will * still use its on-disk encoding in cache. + * @param hfs */ public HFileReaderV2(Path path, FixedFileTrailer trailer, final FSDataInputStreamWrapper fsdis, final long size, final CacheConfig cacheConf, DataBlockEncoding preferredEncodingInCache, final HFileSystem hfs) throws IOException { super(path, trailer, size, cacheConf, hfs); - trailer.expectMajorVersion(2); + trailer.expectMajorVersion(getMajorVersion()); validateMinorVersion(path, trailer.getMinorVersion()); - HFileBlock.FSReaderV2 fsBlockReaderV2 = new HFileBlock.FSReaderV2(fsdis, - compressAlgo, fileSize, trailer.getMinorVersion(), hfs, path); + this.hfileContext = createHFileContext(trailer); + // Should we set the preferredEncodinginCache here for the context + HFileBlock.FSReaderV2 fsBlockReaderV2 = new HFileBlock.FSReaderV2(fsdis, fileSize, hfs, path, + hfileContext); this.fsBlockReader = fsBlockReaderV2; // upcast // Comparator class name is stored in the trailer in version 2. @@ -167,6 +178,15 @@ public class HFileReaderV2 extends AbstractHFileReader { } } + protected HFileContext createHFileContext(FixedFileTrailer trailer) { + HFileContext meta = new HFileContext(); + meta.setIncludesMvcc(this.includesMemstoreTS); + meta.setUsesHBaseChecksum( + trailer.getMinorVersion() >= MINOR_VERSION_WITH_CHECKSUM); + meta.setCompressAlgo(this.compressAlgo); + return meta; + } + /** * Create a Scanner on this file. No seeks or reads are done on creation. Call * {@link HFileScanner#seekTo(byte[])} to position an start the read. There is @@ -185,7 +205,7 @@ public class HFileReaderV2 extends AbstractHFileReader { // check if we want to use data block encoding in memory if (dataBlockEncoder.useEncodedScanner(isCompaction)) { return new EncodedScannerV2(this, cacheBlocks, pread, isCompaction, - includesMemstoreTS); + hfileContext); } return new ScannerV2(this, cacheBlocks, pread, isCompaction); @@ -338,7 +358,7 @@ public class HFileReaderV2 extends AbstractHFileReader { long startTimeNs = System.nanoTime(); HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, -1, pread); - hfileBlock = dataBlockEncoder.diskToCacheFormat(hfileBlock, isCompaction); + hfileBlock = diskToCacheFormat(hfileBlock, isCompaction); validateBlockType(hfileBlock, expectedBlockType); final long delta = System.nanoTime() - startTimeNs; @@ -363,6 +383,10 @@ public class HFileReaderV2 extends AbstractHFileReader { } } + protected HFileBlock diskToCacheFormat( HFileBlock hfileBlock, final boolean isCompaction) { + return dataBlockEncoder.diskToCacheFormat(hfileBlock, isCompaction); + } + /** * Compares the actual type of a block retrieved from cache or disk with its * expected type and throws an exception in case of a mismatch. Expected @@ -612,16 +636,18 @@ public class HFileReaderV2 extends AbstractHFileReader { if (!isSeeked()) return null; - KeyValue ret = new KeyValue(blockBuffer.array(), - blockBuffer.arrayOffset() + blockBuffer.position(), - KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen, - currKeyLen); + KeyValue ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position(), getKvBufSize(), currKeyLen); if (this.reader.shouldIncludeMemstoreTS()) { ret.setMvccVersion(currMemstoreTS); } return ret; } + protected int getKvBufSize() { + return KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen; + } + @Override public ByteBuffer getKey() { assertSeeked(); @@ -640,7 +666,7 @@ public class HFileReaderV2 extends AbstractHFileReader { + KEY_VALUE_LEN_SIZE + currKeyLen, currValueLen).slice(); } - private void setNonSeekedState() { + protected void setNonSeekedState() { block = null; blockBuffer = null; currKeyLen = 0; @@ -661,8 +687,7 @@ public class HFileReaderV2 extends AbstractHFileReader { assertSeeked(); try { - blockBuffer.position(blockBuffer.position() + KEY_VALUE_LEN_SIZE - + currKeyLen + currValueLen + currMemstoreTSLen); + blockBuffer.position(getNextKVStartPosition()); } catch (IllegalArgumentException e) { LOG.error("Current pos = " + blockBuffer.position() + "; currKeyLen = " + currKeyLen + "; currValLen = " @@ -697,6 +722,11 @@ public class HFileReaderV2 extends AbstractHFileReader { return true; } + protected int getNextKVStartPosition() { + return blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + + currMemstoreTSLen; + } + /** * Positions this scanner at the start of the file. * @@ -753,7 +783,7 @@ public class HFileReaderV2 extends AbstractHFileReader { * * @param newBlock the block to make current */ - private void updateCurrBlock(HFileBlock newBlock) { + protected void updateCurrBlock(HFileBlock newBlock) { block = newBlock; // sanity check @@ -773,19 +803,29 @@ public class HFileReaderV2 extends AbstractHFileReader { this.nextIndexedKey = null; } - private final void readKeyValueLen() { + protected void readKeyValueLen() { blockBuffer.mark(); currKeyLen = blockBuffer.getInt(); currValueLen = blockBuffer.getInt(); + ByteBufferUtils.skip(blockBuffer, currKeyLen + currValueLen); + readMvccVersion(); + if (currKeyLen < 0 || currValueLen < 0 + || currKeyLen > blockBuffer.limit() + || currValueLen > blockBuffer.limit()) { + throw new IllegalStateException("Invalid currKeyLen " + currKeyLen + + " or currValueLen " + currValueLen + ". Block offset: " + + block.getOffset() + ", block length: " + blockBuffer.limit() + + ", position: " + blockBuffer.position() + " (without header)."); + } blockBuffer.reset(); + } + + protected void readMvccVersion() { if (this.reader.shouldIncludeMemstoreTS()) { if (this.reader.decodeMemstoreTS) { try { - int memstoreTSOffset = blockBuffer.arrayOffset() - + blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen - + currValueLen; - currMemstoreTS = Bytes.readVLong(blockBuffer.array(), - memstoreTSOffset); + currMemstoreTS = Bytes.readVLong(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position()); currMemstoreTSLen = WritableUtils.getVIntSize(currMemstoreTS); } catch (Exception e) { throw new RuntimeException("Error reading memstore timestamp", e); @@ -795,15 +835,6 @@ public class HFileReaderV2 extends AbstractHFileReader { currMemstoreTSLen = 1; } } - - if (currKeyLen < 0 || currValueLen < 0 - || currKeyLen > blockBuffer.limit() - || currValueLen > blockBuffer.limit()) { - throw new IllegalStateException("Invalid currKeyLen " + currKeyLen - + " or currValueLen " + currValueLen + ". Block offset: " - + block.getOffset() + ", block length: " + blockBuffer.limit() - + ", position: " + blockBuffer.position() + " (without header)."); - } } /** @@ -821,7 +852,7 @@ public class HFileReaderV2 extends AbstractHFileReader { * -2 in case of an inexact match and furthermore, the input key less * than the first key of current block(e.g. using a faked index key) */ - private int blockSeek(byte[] key, int offset, int length, + protected int blockSeek(byte[] key, int offset, int length, boolean seekBefore) { int klen, vlen; long memstoreTS = 0; @@ -931,34 +962,34 @@ public class HFileReaderV2 extends AbstractHFileReader { */ protected static class EncodedScannerV2 extends AbstractScannerV2 { private DataBlockEncoder.EncodedSeeker seeker = null; - private DataBlockEncoder dataBlockEncoder = null; - private final boolean includesMemstoreTS; - + protected DataBlockEncoder dataBlockEncoder = null; + protected final HFileContext meta; + protected HFileBlockDecodingContext decodingCtx; public EncodedScannerV2(HFileReaderV2 reader, boolean cacheBlocks, - boolean pread, boolean isCompaction, boolean includesMemstoreTS) { + boolean pread, boolean isCompaction, HFileContext meta) { super(reader, cacheBlocks, pread, isCompaction); - this.includesMemstoreTS = includesMemstoreTS; + this.meta = meta; } - private void setDataBlockEncoder(DataBlockEncoder dataBlockEncoder) { + protected void setDataBlockEncoder(DataBlockEncoder dataBlockEncoder) { this.dataBlockEncoder = dataBlockEncoder; - seeker = dataBlockEncoder.createSeeker(reader.getComparator(), - includesMemstoreTS); + decodingCtx = this.dataBlockEncoder.newDataBlockDecodingContext( + this.meta); + seeker = dataBlockEncoder.createSeeker(reader.getComparator(), decodingCtx); } - /** * Updates the current block to be the given {@link HFileBlock}. Seeks to * the the first key/value pair. * * @param newBlock the block to make current */ - private void updateCurrentBlock(HFileBlock newBlock) { + protected void updateCurrentBlock(HFileBlock newBlock) { block = newBlock; // sanity checks if (block.getBlockType() != BlockType.ENCODED_DATA) { throw new IllegalStateException( - "EncodedScannerV2 works only on encoded data blocks"); + "EncodedScanner works only on encoded data blocks"); } short dataBlockEncoderId = block.getDataBlockEncodingId(); @@ -1131,4 +1162,9 @@ public class HFileReaderV2 extends AbstractHFileReader { throw new RuntimeException(msg); } } + + @Override + public int getMajorVersion() { + return 2; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java new file mode 100644 index 00000000000..cec92958516 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java @@ -0,0 +1,276 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.fs.HFileSystem; +import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.util.ByteBufferUtils; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.WritableUtils; + +/** + * {@link HFile} reader for version 3. + * This Reader is aware of Tags. + */ +@InterfaceAudience.Private +public class HFileReaderV3 extends HFileReaderV2 { + + public static final int MAX_MINOR_VERSION = 0; + /** + * Opens a HFile. You must load the index before you can use it by calling + * {@link #loadFileInfo()}. + * @param path + * Path to HFile. + * @param trailer + * File trailer. + * @param fsdis + * input stream. + * @param size + * Length of the stream. + * @param cacheConf + * Cache configuration. + * @param preferredEncodingInCache + * the encoding to use in cache in case we have a choice. If the file + * is already encoded on disk, we will still use its on-disk encoding + * in cache. + */ + public HFileReaderV3(Path path, FixedFileTrailer trailer, final FSDataInputStreamWrapper fsdis, + final long size, final CacheConfig cacheConf, DataBlockEncoding preferredEncodingInCache, + final HFileSystem hfs) throws IOException { + super(path, trailer, fsdis, size, cacheConf, preferredEncodingInCache, hfs); + + } + + @Override + protected HFileContext createHFileContext(FixedFileTrailer trailer) { + HFileContext meta = new HFileContext(); + meta.setIncludesMvcc(this.includesMemstoreTS); + meta.setUsesHBaseChecksum(true); + meta.setCompressAlgo(this.compressAlgo); + meta.setIncludesTags(true); + return meta; + } + + /** + * Create a Scanner on this file. No seeks or reads are done on creation. Call + * {@link HFileScanner#seekTo(byte[])} to position an start the read. There is + * nothing to clean up in a Scanner. Letting go of your references to the + * scanner is sufficient. + * @param cacheBlocks + * True if we should cache blocks read in by this scanner. + * @param pread + * Use positional read rather than seek+read if true (pread is better + * for random reads, seek+read is better scanning). + * @param isCompaction + * is scanner being used for a compaction? + * @return Scanner on this file. + */ + @Override + public HFileScanner getScanner(boolean cacheBlocks, final boolean pread, + final boolean isCompaction) { + // check if we want to use data block encoding in memory + if (dataBlockEncoder.useEncodedScanner(isCompaction)) { + return new EncodedScannerV3(this, cacheBlocks, pread, isCompaction, this.hfileContext); + } + return new ScannerV3(this, cacheBlocks, pread, isCompaction); + } + + /** + * Implementation of {@link HFileScanner} interface. + */ + protected static class ScannerV3 extends ScannerV2 { + + private HFileReaderV3 reader; + private int currTagsLen; + + public ScannerV3(HFileReaderV3 r, boolean cacheBlocks, final boolean pread, + final boolean isCompaction) { + super(r, cacheBlocks, pread, isCompaction); + this.reader = r; + } + + @Override + protected int getKvBufSize() { + int kvBufSize = super.getKvBufSize(); + if (reader.hfileContext.shouldIncludeTags()) { + kvBufSize += Bytes.SIZEOF_SHORT + currTagsLen; + } + return kvBufSize; + } + + protected void setNonSeekedState() { + super.setNonSeekedState(); + currTagsLen = 0; + } + + @Override + protected int getNextKVStartPosition() { + int nextKvPos = super.getNextKVStartPosition(); + if (reader.hfileContext.shouldIncludeTags()) { + nextKvPos += Bytes.SIZEOF_SHORT + currTagsLen; + } + return nextKvPos; + } + + protected void readKeyValueLen() { + blockBuffer.mark(); + currKeyLen = blockBuffer.getInt(); + currValueLen = blockBuffer.getInt(); + ByteBufferUtils.skip(blockBuffer, currKeyLen + currValueLen); + if (reader.hfileContext.shouldIncludeTags()) { + currTagsLen = blockBuffer.getShort(); + ByteBufferUtils.skip(blockBuffer, currTagsLen); + } + readMvccVersion(); + if (currKeyLen < 0 || currValueLen < 0 || currTagsLen < 0 || currKeyLen > blockBuffer.limit() + || currValueLen > blockBuffer.limit() || currTagsLen > blockBuffer.limit()) { + throw new IllegalStateException("Invalid currKeyLen " + currKeyLen + " or currValueLen " + + currValueLen + " or currTagLen " + currTagsLen + ". Block offset: " + + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " + + blockBuffer.position() + " (without header)."); + } + blockBuffer.reset(); + } + + /** + * Within a loaded block, seek looking for the last key that is smaller than + * (or equal to?) the key we are interested in. + * A note on the seekBefore: if you have seekBefore = true, AND the first + * key in the block = key, then you'll get thrown exceptions. The caller has + * to check for that case and load the previous block as appropriate. + * @param key + * the key to find + * @param seekBefore + * find the key before the given key in case of exact match. + * @param offset + * Offset to find the key in the given bytebuffer + * @param length + * Length of the key to be found + * @return 0 in case of an exact key match, 1 in case of an inexact match, + * -2 in case of an inexact match and furthermore, the input key + * less than the first key of current block(e.g. using a faked index + * key) + */ + protected int blockSeek(byte[] key, int offset, int length, boolean seekBefore) { + int klen, vlen, tlen = 0; + long memstoreTS = 0; + int memstoreTSLen = 0; + int lastKeyValueSize = -1; + do { + blockBuffer.mark(); + klen = blockBuffer.getInt(); + vlen = blockBuffer.getInt(); + ByteBufferUtils.skip(blockBuffer, klen + vlen); + if (reader.hfileContext.shouldIncludeTags()) { + tlen = blockBuffer.getShort(); + ByteBufferUtils.skip(blockBuffer, tlen); + } + if (this.reader.shouldIncludeMemstoreTS()) { + if (this.reader.decodeMemstoreTS) { + try { + memstoreTS = Bytes.readVLong(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position()); + memstoreTSLen = WritableUtils.getVIntSize(memstoreTS); + } catch (Exception e) { + throw new RuntimeException("Error reading memstore timestamp", e); + } + } else { + memstoreTS = 0; + memstoreTSLen = 1; + } + } + blockBuffer.reset(); + int keyOffset = blockBuffer.arrayOffset() + blockBuffer.position() + (Bytes.SIZEOF_INT * 2); + int comp = reader.getComparator().compare(key, offset, length, blockBuffer.array(), + keyOffset, klen); + + if (comp == 0) { + if (seekBefore) { + if (lastKeyValueSize < 0) { + throw new IllegalStateException("blockSeek with seekBefore " + + "at the first key of the block: key=" + Bytes.toStringBinary(key) + + ", blockOffset=" + block.getOffset() + ", onDiskSize=" + + block.getOnDiskSizeWithHeader()); + } + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + return 1; // non exact match. + } + currKeyLen = klen; + currValueLen = vlen; + currTagsLen = tlen; + if (this.reader.shouldIncludeMemstoreTS()) { + currMemstoreTS = memstoreTS; + currMemstoreTSLen = memstoreTSLen; + } + return 0; // indicate exact match + } else if (comp < 0) { + if (lastKeyValueSize > 0) + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + if (lastKeyValueSize == -1 && blockBuffer.position() == 0) { + return HConstants.INDEX_KEY_MAGIC; + } + return 1; + } + + // The size of this key/value tuple, including key/value length fields. + lastKeyValueSize = klen + vlen + memstoreTSLen + KEY_VALUE_LEN_SIZE; + // include tag length also if tags included with KV + if (reader.hfileContext.shouldIncludeTags()) { + lastKeyValueSize += tlen + Bytes.SIZEOF_SHORT; + } + blockBuffer.position(blockBuffer.position() + lastKeyValueSize); + } while (blockBuffer.remaining() > 0); + + // Seek to the last key we successfully read. This will happen if this is + // the last key/value pair in the file, in which case the following call + // to next() has to return false. + blockBuffer.position(blockBuffer.position() - lastKeyValueSize); + readKeyValueLen(); + return 1; // didn't exactly find it. + } + + } + + /** + * ScannerV3 that operates on encoded data blocks. + */ + protected static class EncodedScannerV3 extends EncodedScannerV2 { + public EncodedScannerV3(HFileReaderV3 reader, boolean cacheBlocks, boolean pread, + boolean isCompaction, HFileContext meta) { + super(reader, cacheBlocks, pread, isCompaction, meta); + } + } + + @Override + public int getMajorVersion() { + return 3; + } + + @Override + protected HFileBlock diskToCacheFormat(HFileBlock hfileBlock, final boolean isCompaction) { + return dataBlockEncoder.diskToCacheFormat(hfileBlock, isCompaction); + } +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java index db6dc9c6302..5c37ca5ee1b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV2.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable; import org.apache.hadoop.hbase.util.ChecksumType; @@ -66,7 +67,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { new ArrayList(); /** Unified version 2 block writer */ - private HFileBlock.Writer fsBlockWriter; + protected HFileBlock.Writer fsBlockWriter; private HFileBlockIndex.BlockIndexWriter dataBlockIndexWriter; private HFileBlockIndex.BlockIndexWriter metaBlockIndexWriter; @@ -75,7 +76,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { private long firstDataBlockOffset = -1; /** The offset of the last data block or 0 if the file is empty. */ - private long lastDataBlockOffset; + protected long lastDataBlockOffset; /** The last(stop) Key of the previous data block. */ private byte[] lastKeyOfPreviousBlock = null; @@ -84,12 +85,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { private List additionalLoadOnOpenData = new ArrayList(); - /** Checksum related settings */ - private ChecksumType checksumType = HFile.DEFAULT_CHECKSUM_TYPE; - private int bytesPerChecksum = HFile.DEFAULT_BYTES_PER_CHECKSUM; - - private final boolean includeMemstoreTS; - private long maxMemstoreTS = 0; + protected long maxMemstoreTS = 0; static class WriterFactoryV2 extends HFile.WriterFactory { WriterFactoryV2(Configuration conf, CacheConfig cacheConf) { @@ -97,39 +93,30 @@ public class HFileWriterV2 extends AbstractHFileWriter { } @Override - public Writer createWriter(FileSystem fs, Path path, - FSDataOutputStream ostream, int blockSize, - Compression.Algorithm compress, HFileDataBlockEncoder blockEncoder, - final KVComparator comparator, final ChecksumType checksumType, - final int bytesPerChecksum, boolean includeMVCCReadpoint) throws IOException { - return new HFileWriterV2(conf, cacheConf, fs, path, ostream, blockSize, compress, - blockEncoder, comparator, checksumType, bytesPerChecksum, includeMVCCReadpoint); + public Writer createWriter(FileSystem fs, Path path, + FSDataOutputStream ostream, + KVComparator comparator, HFileContext context) throws IOException { + return new HFileWriterV2(conf, cacheConf, fs, path, ostream, + comparator, context); + } } - } /** Constructor that takes a path, creates and closes the output stream. */ public HFileWriterV2(Configuration conf, CacheConfig cacheConf, - FileSystem fs, Path path, FSDataOutputStream ostream, int blockSize, - Compression.Algorithm compressAlgo, HFileDataBlockEncoder blockEncoder, - final KVComparator comparator, final ChecksumType checksumType, - final int bytesPerChecksum, final boolean includeMVCCReadpoint) throws IOException { + FileSystem fs, Path path, FSDataOutputStream ostream, + final KVComparator comparator, final HFileContext context) throws IOException { super(cacheConf, ostream == null ? createOutputStream(conf, fs, path, null) : ostream, - path, blockSize, compressAlgo, blockEncoder, comparator); - this.checksumType = checksumType; - this.bytesPerChecksum = bytesPerChecksum; - this.includeMemstoreTS = includeMVCCReadpoint; + path, comparator, context); finishInit(conf); } /** Additional initialization steps */ - private void finishInit(final Configuration conf) { + protected void finishInit(final Configuration conf) { if (fsBlockWriter != null) throw new IllegalStateException("finishInit called twice"); - // HFile filesystem-level (non-caching) block writer - fsBlockWriter = new HFileBlock.Writer(compressAlgo, blockEncoder, - includeMemstoreTS, checksumType, bytesPerChecksum); + fsBlockWriter = createBlockWriter(); // Data block index writer boolean cacheIndexesOnWrite = cacheConf.shouldCacheIndexesOnWrite(); @@ -145,13 +132,21 @@ public class HFileWriterV2 extends AbstractHFileWriter { if (LOG.isTraceEnabled()) LOG.trace("Initialized with " + cacheConf); } + protected HFileBlock.Writer createBlockWriter() { + // HFile filesystem-level (non-caching) block writer + hFileContext.setIncludesTags(false); + // This can be set while the write is created itself because + // in both cases useHBaseChecksum is going to be true + hFileContext.setUsesHBaseChecksum(true); + return new HFileBlock.Writer(blockEncoder, hFileContext); + } /** * At a block boundary, write all the inline blocks and opens new block. * * @throws IOException */ - private void checkBlockBoundary() throws IOException { - if (fsBlockWriter.blockSizeWritten() < blockSize) + protected void checkBlockBoundary() throws IOException { + if (fsBlockWriter.blockSizeWritten() < hFileContext.getBlocksize()) return; finishBlock(); @@ -224,7 +219,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { * * @throws IOException */ - private void newBlock() throws IOException { + protected void newBlock() throws IOException { // This is where the next block begins. fsBlockWriter.startWriting(BlockType.DATA); firstKeyInBlock = null; @@ -303,8 +298,8 @@ public class HFileWriterV2 extends AbstractHFileWriter { * @param vlength * @throws IOException */ - private void append(final long memstoreTS, final byte[] key, final int koffset, final int klength, - final byte[] value, final int voffset, final int vlength) + protected void append(final long memstoreTS, final byte[] key, final int koffset, + final int klength, final byte[] value, final int voffset, final int vlength) throws IOException { boolean dupKey = checkKey(key, koffset, klength); checkValue(value, voffset, vlength); @@ -325,7 +320,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { totalValueLength += vlength; out.write(key, koffset, klength); out.write(value, voffset, vlength); - if (this.includeMemstoreTS) { + if (this.hFileContext.shouldIncludeMvcc()) { WritableUtils.writeVLong(out, memstoreTS); } } @@ -356,8 +351,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { finishBlock(); writeInlineBlocks(true); - FixedFileTrailer trailer = new FixedFileTrailer(2, - HFileReaderV2.MAX_MINOR_VERSION); + FixedFileTrailer trailer = new FixedFileTrailer(getMajorVersion(), getMinorVersion()); // Write out the metadata blocks if any. if (!metaNames.isEmpty()) { @@ -395,7 +389,7 @@ public class HFileWriterV2 extends AbstractHFileWriter { fsBlockWriter.writeHeaderAndData(outputStream); totalUncompressedBytes += fsBlockWriter.getUncompressedSizeWithHeader(); - if (this.includeMemstoreTS) { + if (this.hFileContext.shouldIncludeMvcc()) { appendFileInfo(MAX_MEMSTORE_TS_KEY, Bytes.toBytes(maxMemstoreTS)); appendFileInfo(KEY_VALUE_VERSION, Bytes.toBytes(KEY_VALUE_VER_WITH_MEMSTORE)); } @@ -466,4 +460,17 @@ public class HFileWriterV2 extends AbstractHFileWriter { } }); } + + @Override + public void append(byte[] key, byte[] value, byte[] tag) throws IOException { + throw new UnsupportedOperationException("KV tags are supported only from HFile V3"); + } + + protected int getMajorVersion() { + return 2; + } + + protected int getMinorVersion() { + return HFileReaderV2.MAX_MINOR_VERSION; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java new file mode 100644 index 00000000000..101abeb2084 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterV3.java @@ -0,0 +1,202 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.KVComparator; +import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; +import org.apache.hadoop.hbase.io.hfile.HFile.Writer; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.io.WritableUtils; + +/** + * This is an extension of HFileWriterV2 that is tags aware. + */ +@InterfaceAudience.Private +public class HFileWriterV3 extends HFileWriterV2 { + + // TODO : Use this to track maxtaglength + private int maxTagsLength = 0; + + static class WriterFactoryV3 extends HFile.WriterFactory { + WriterFactoryV3(Configuration conf, CacheConfig cacheConf) { + super(conf, cacheConf); + } + + @Override + public Writer createWriter(FileSystem fs, Path path, FSDataOutputStream ostream, + final KVComparator comparator, HFileContext fileContext) + throws IOException { + return new HFileWriterV3(conf, cacheConf, fs, path, ostream, comparator, fileContext); + } + } + + /** Constructor that takes a path, creates and closes the output stream. */ + public HFileWriterV3(Configuration conf, CacheConfig cacheConf, FileSystem fs, Path path, + FSDataOutputStream ostream, final KVComparator comparator, + final HFileContext fileContext) throws IOException { + super(conf, cacheConf, fs, path, ostream, comparator, fileContext); + } + + /** + * Add key/value to file. Keys must be added in an order that agrees with the + * Comparator passed on construction. + * + * @param kv + * KeyValue to add. Cannot be empty nor null. + * @throws IOException + */ + @Override + public void append(final KeyValue kv) throws IOException { + // Currently get the complete arrays + append(kv.getMvccVersion(), kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength(), + kv.getBuffer(), kv.getValueOffset(), kv.getValueLength(), kv.getBuffer(), + kv.getTagsOffset(), kv.getTagsLength()); + this.maxMemstoreTS = Math.max(this.maxMemstoreTS, kv.getMvccVersion()); + } + + /** + * Add key/value to file. Keys must be added in an order that agrees with the + * Comparator passed on construction. + * @param key + * Key to add. Cannot be empty nor null. + * @param value + * Value to add. Cannot be empty nor null. + * @throws IOException + */ + @Override + public void append(final byte[] key, final byte[] value) throws IOException { + append(key, value, HConstants.EMPTY_BYTE_ARRAY); + } + + /** + * Add key/value to file. Keys must be added in an order that agrees with the + * Comparator passed on construction. + * @param key + * Key to add. Cannot be empty nor null. + * @param value + * Value to add. Cannot be empty nor null. + * @param tag + * Tag t add. Cannot be empty or null. + * @throws IOException + */ + @Override + public void append(final byte[] key, final byte[] value, byte[] tag) throws IOException { + append(0, key, 0, key.length, value, 0, value.length, tag, 0, tag.length); + } + + /** + * Add key/value to file. Keys must be added in an order that agrees with the + * Comparator passed on construction. + * @param key + * @param koffset + * @param klength + * @param value + * @param voffset + * @param vlength + * @param tag + * @param tagsOffset + * @param tagLength + * @throws IOException + */ + private void append(final long memstoreTS, final byte[] key, final int koffset, + final int klength, final byte[] value, final int voffset, final int vlength, + final byte[] tag, final int tagsOffset, final int tagsLength) throws IOException { + boolean dupKey = checkKey(key, koffset, klength); + checkValue(value, voffset, vlength); + if (!dupKey) { + checkBlockBoundary(); + } + + if (!fsBlockWriter.isWriting()) + newBlock(); + + // Write length of key and value and then actual key and value bytes. + // Additionally, we may also write down the memstoreTS. + { + DataOutputStream out = fsBlockWriter.getUserDataStream(); + out.writeInt(klength); + totalKeyLength += klength; + out.writeInt(vlength); + totalValueLength += vlength; + out.write(key, koffset, klength); + out.write(value, voffset, vlength); + // Write the additional tag into the stream + if (hFileContext.shouldIncludeTags()) { + out.writeShort((short) tagsLength); + if (tagsLength > 0) { + out.write(tag, tagsOffset, tagsLength); + if (tagsLength > maxTagsLength) { + maxTagsLength = tagsLength; + } + } + } + if (this.hFileContext.shouldIncludeMvcc()) { + WritableUtils.writeVLong(out, memstoreTS); + } + } + + // Are we the first key in this block? + if (firstKeyInBlock == null) { + // Copy the key. + firstKeyInBlock = new byte[klength]; + System.arraycopy(key, koffset, firstKeyInBlock, 0, klength); + } + + lastKeyBuffer = key; + lastKeyOffset = koffset; + lastKeyLength = klength; + entryCount++; + } + + protected void finishFileInfo() throws IOException { + super.finishFileInfo(); + if (hFileContext.shouldIncludeTags()) { + // When tags are not being written in this file, MAX_TAGS_LEN is excluded + // from the FileInfo + fileInfo.append(FileInfo.MAX_TAGS_LEN, Bytes.toBytes(this.maxTagsLength), false); + } + } + + @Override + protected HFileBlock.Writer createBlockWriter() { + // HFile filesystem-level (non-caching) block writer + hFileContext.setIncludesTags(true); + hFileContext.setUsesHBaseChecksum(true); + return new HFileBlock.Writer(blockEncoder, hFileContext); + } + + @Override + protected int getMajorVersion() { + return 3; + } + + @Override + protected int getMinorVersion() { + return HFileReaderV3.MAX_MINOR_VERSION; + } +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java index 22a9b0ff47a..aca80377021 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/NoOpDataBlockEncoder.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; +import org.apache.hadoop.hbase.io.hfile.HFileContext; /** * Does not perform any kind of encoding/decoding. @@ -50,7 +51,6 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder { @Override public void beforeWriteToDisk(ByteBuffer in, - boolean includesMemstoreTS, HFileBlockEncodingContext encodeCtx, BlockType blockType) throws IOException { if (!(encodeCtx.getClass().getName().equals( @@ -95,15 +95,13 @@ public class NoOpDataBlockEncoder implements HFileDataBlockEncoder { @Override public HFileBlockEncodingContext newOnDiskDataBlockEncodingContext( - Algorithm compressionAlgorithm, byte[] dummyHeader) { - return new HFileBlockDefaultEncodingContext(compressionAlgorithm, - null, dummyHeader); + byte[] dummyHeader, HFileContext meta) { + return new HFileBlockDefaultEncodingContext(null, dummyHeader, meta); } @Override - public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext( - Algorithm compressionAlgorithm) { - return new HFileBlockDefaultDecodingContext(compressionAlgorithm); + public HFileBlockDecodingContext newOnDiskDataBlockDecodingContext(HFileContext meta) { + return new HFileBlockDefaultDecodingContext(meta); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java index 1f7b4a7ab1e..9cb9d03e818 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/HFileOutputFormat.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; @@ -106,20 +107,7 @@ public class HFileOutputFormat extends FileOutputFormat bloomTypeMap = createFamilyBloomMap(conf); final Map blockSizeMap = createFamilyBlockSizeMap(conf); - String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); - final HFileDataBlockEncoder encoder; - if (dataBlockEncodingStr == null) { - encoder = NoOpDataBlockEncoder.INSTANCE; - } else { - try { - encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding - .valueOf(dataBlockEncodingStr)); - } catch (IllegalArgumentException ex) { - throw new RuntimeException( - "Invalid data block encoding type configured for the param " - + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); - } - } + final String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); return new RecordWriter() { // Map of families to writers and how much has been output on the writer. @@ -206,14 +194,18 @@ public class HFileOutputFormat extends FileOutputFormat= smallestReadPoint); + fd.maxMVCCReadpoint >= smallestReadPoint, fd.maxTagsLength > 0); boolean finished = performCompaction(scanner, writer, smallestReadPoint); if (!finished) { abortWriter(writer); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java index c2138a9c2a3..0aa9b2f827b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java @@ -290,6 +290,9 @@ public class HLogPrettyPrinter { + op.get("qualifier")); out.println(" timestamp: " + (new Date((Long) op.get("timestamp")))); + if(op.get("tag") != null) { + out.println(" tag: " + op.get("tag")); + } if (outputValues) out.println(" value: " + op.get("value")); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/KeyValueCompression.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/KeyValueCompression.java index d14610aec86..0b22ed34424 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/KeyValueCompression.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/KeyValueCompression.java @@ -50,7 +50,8 @@ class KeyValueCompression { throws IOException { int keylength = WritableUtils.readVInt(in); int vlength = WritableUtils.readVInt(in); - int length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength; + int tagsLength = WritableUtils.readVInt(in); + int length = (int) KeyValue.getKeyValueDataStructureSize(keylength, vlength, tagsLength); byte[] backingArray = new byte[length]; int pos = 0; @@ -79,7 +80,7 @@ class KeyValueCompression { // the rest in.readFully(backingArray, pos, length - pos); - return new KeyValue(backingArray); + return new KeyValue(backingArray, 0, length); } private static void checkLength(int len, int max) throws IOException { @@ -105,6 +106,7 @@ class KeyValueCompression { // we first write the KeyValue infrastructure as VInts. WritableUtils.writeVInt(out, keyVal.getKeyLength()); WritableUtils.writeVInt(out, keyVal.getValueLength()); + WritableUtils.writeVInt(out, keyVal.getTagsLength()); // now we write the row key, as the row key is likely to be repeated // We save space only if we attempt to compress elements with duplicates diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALCellCodec.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALCellCodec.java index 187460db66e..4f73f3238e3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALCellCodec.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALCellCodec.java @@ -156,8 +156,8 @@ public class WALCellCodec implements Codec { // We first write the KeyValue infrastructure as VInts. StreamUtils.writeRawVInt32(out, kv.getKeyLength()); StreamUtils.writeRawVInt32(out, kv.getValueLength()); - // To support tags. This will be replaced with kv.getTagsLength - StreamUtils.writeRawVInt32(out, (short)0); + // To support tags + StreamUtils.writeRawVInt32(out, kv.getTagsLength()); // Write row, qualifier, and family; use dictionary // compression as they're likely to have duplicates. @@ -199,10 +199,13 @@ public class WALCellCodec implements Codec { int keylength = StreamUtils.readRawVarint32(in); int vlength = StreamUtils.readRawVarint32(in); - // To support Tags..Tags length will be 0. - // For now ignore the read value. This will be the tagslength - StreamUtils.readRawVarint32(in); - int length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength; + int tagsLength = StreamUtils.readRawVarint32(in); + int length = 0; + if(tagsLength == 0) { + length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength; + } else { + length = KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE + keylength + vlength + tagsLength; + } byte[] backingArray = new byte[length]; int pos = 0; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumFactory.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumFactory.java index 4fc09e103a6..e69de29bb2d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumFactory.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumFactory.java @@ -1,97 +0,0 @@ -/** - * Copyright The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.util; - -import java.io.IOException; -import java.lang.ClassNotFoundException; -import java.util.zip.Checksum; -import java.lang.reflect.Constructor; - -/** - * Utility class that is used to generate a Checksum object. - * The Checksum implementation is pluggable and an application - * can specify their own class that implements their own - * Checksum algorithm. - */ -public class ChecksumFactory { - - static private final Class[] EMPTY_ARRAY = new Class[]{}; - - /** - * Create a new instance of a Checksum object. - * @return The newly created Checksum object - */ - static public Checksum newInstance(String className) throws IOException { - try { - Class clazz = getClassByName(className); - return (Checksum)newInstance(clazz); - } catch (ClassNotFoundException e) { - throw new IOException(e); - } - } - - /** - * Returns a Constructor that can be used to create a Checksum object. - * @param className classname for which an constructor is created - * @return a new Constructor object - */ - static public Constructor newConstructor(String className) - throws IOException { - try { - Class clazz = getClassByName(className); - Constructor ctor = clazz.getDeclaredConstructor(EMPTY_ARRAY); - ctor.setAccessible(true); - return ctor; - } catch (ClassNotFoundException e) { - throw new IOException(e); - } catch (java.lang.NoSuchMethodException e) { - throw new IOException(e); - } - } - - /** Create an object for the given class and initialize it from conf - * - * @param theClass class of which an object is created - * @return a new object - */ - static private T newInstance(Class theClass) { - T result; - try { - Constructor ctor = theClass.getDeclaredConstructor(EMPTY_ARRAY); - ctor.setAccessible(true); - result = ctor.newInstance(); - } catch (Exception e) { - throw new RuntimeException(e); - } - return result; - } - - /** - * Load a class by name. - * @param name the class name. - * @return the class object. - * @throws ClassNotFoundException if the class is not found. - */ - static private Class getClassByName(String name) - throws ClassNotFoundException { - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - return Class.forName(name, true, classLoader); - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java index 63d3a2e0dfc..e69de29bb2d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ChecksumType.java @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hbase.util; - -import java.io.IOException; -import java.lang.reflect.Constructor; -import java.util.zip.Checksum; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -/** - * Checksum types. The Checksum type is a one byte number - * that stores a representation of the checksum algorithm - * used to encode a hfile. The ordinal of these cannot - * change or else you risk breaking all existing HFiles out there. - */ -public enum ChecksumType { - - NULL((byte)0) { - @Override - public String getName() { - return "NULL"; - } - @Override - public void initialize() { - // do nothing - } - @Override - public Checksum getChecksumObject() throws IOException { - return null; // checksums not used - } - }, - - CRC32((byte)1) { - private volatile Constructor ctor; - - @Override - public String getName() { - return "CRC32"; - } - - @Override - public void initialize() { - final String PURECRC32 = "org.apache.hadoop.util.PureJavaCrc32"; - final String JDKCRC = "java.util.zip.CRC32"; - LOG = LogFactory.getLog(ChecksumType.class); - - // check if hadoop library is available - try { - ctor = ChecksumFactory.newConstructor(PURECRC32); - LOG.info("Checksum using " + PURECRC32); - } catch (Exception e) { - LOG.trace(PURECRC32 + " not available."); - } - try { - // The default checksum class name is java.util.zip.CRC32. - // This is available on all JVMs. - if (ctor == null) { - ctor = ChecksumFactory.newConstructor(JDKCRC); - LOG.info("Checksum can use " + JDKCRC); - } - } catch (Exception e) { - LOG.trace(JDKCRC + " not available."); - } - } - - @Override - public Checksum getChecksumObject() throws IOException { - if (ctor == null) { - throw new IOException("Bad constructor for " + getName()); - } - try { - return (Checksum)ctor.newInstance(); - } catch (Exception e) { - throw new IOException(e); - } - } - }, - - CRC32C((byte)2) { - private transient Constructor ctor; - - @Override - public String getName() { - return "CRC32C"; - } - - @Override - public void initialize() { - final String PURECRC32C = "org.apache.hadoop.util.PureJavaCrc32C"; - LOG = LogFactory.getLog(ChecksumType.class); - try { - ctor = ChecksumFactory.newConstructor(PURECRC32C); - LOG.info("Checksum can use " + PURECRC32C); - } catch (Exception e) { - LOG.trace(PURECRC32C + " not available."); - } - } - - @Override - public Checksum getChecksumObject() throws IOException { - if (ctor == null) { - throw new IOException("Bad constructor for " + getName()); - } - try { - return (Checksum)ctor.newInstance(); - } catch (Exception e) { - throw new IOException(e); - } - } - }; - - private final byte code; - protected Log LOG; - - /** initializes the relevant checksum class object */ - abstract void initialize(); - - /** returns the name of this checksum type */ - public abstract String getName(); - - private ChecksumType(final byte c) { - this.code = c; - initialize(); - } - - /** returns a object that can be used to generate/validate checksums */ - public abstract Checksum getChecksumObject() throws IOException; - - public byte getCode() { - return this.code; - } - - /** - * Cannot rely on enum ordinals . They change if item is removed or moved. - * Do our own codes. - * @param b - * @return Type associated with passed code. - */ - public static ChecksumType codeToType(final byte b) { - for (ChecksumType t : ChecksumType.values()) { - if (t.getCode() == b) { - return t; - } - } - throw new RuntimeException("Unknown checksum type code " + b); - } - - /** - * Map a checksum name to a specific type. - * Do our own names. - * @param name - * @return Type associated with passed code. - */ - public static ChecksumType nameToType(final String name) { - for (ChecksumType t : ChecksumType.values()) { - if (t.getName().equals(name)) { - return t; - } - } - throw new RuntimeException("Unknown checksum type name " + name); - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompressionTest.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompressionTest.java index eff0a964b6b..e7360c03088 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompressionTest.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompressionTest.java @@ -34,6 +34,8 @@ import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.AbstractHFileWriter; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.io.compress.Compressor; /** @@ -112,9 +114,11 @@ public class CompressionTest { public static void doSmokeTest(FileSystem fs, Path path, String codec) throws Exception { Configuration conf = HBaseConfiguration.create(); + HFileContext context = new HFileContext(); + context.setCompressAlgo(AbstractHFileWriter.compressionByName(codec)); HFile.Writer writer = HFile.getWriterFactoryNoCache(conf) .withPath(fs, path) - .withCompression(codec) + .withFileContext(context) .create(); writer.append(Bytes.toBytes("testkey"), Bytes.toBytes("testval")); writer.appendFileInfo(Bytes.toBytes("infokey"), Bytes.toBytes("infoval")); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index e326cec3470..c6bc93f08dc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -178,13 +178,15 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { { Compression.Algorithm.GZ } }); - /** This is for unit tests parameterized with a single boolean. */ + /** This is for unit tests parameterized with a two booleans. */ public static final List BOOLEAN_PARAMETERIZED = Arrays.asList(new Object[][] { { new Boolean(false) }, { new Boolean(true) } }); - + + /** This is for unit tests parameterized with a single boolean. */ + public static final List MEMSTORETS_TAGS_PARAMETRIZED = memStoreTSAndTagsCombination() ; /** Compression algorithms to use in testing */ public static final Compression.Algorithm[] COMPRESSION_ALGORITHMS ={ Compression.Algorithm.NONE, Compression.Algorithm.GZ @@ -205,6 +207,18 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { return Collections.unmodifiableList(configurations); } + /** + * Create combination of memstoreTS and tags + */ + private static List memStoreTSAndTagsCombination() { + List configurations = new ArrayList(); + configurations.add(new Object[] { false, false }); + configurations.add(new Object[] { false, true }); + configurations.add(new Object[] { true, false }); + configurations.add(new Object[] { true, true }); + return Collections.unmodifiableList(configurations); + } + public static final Collection BLOOM_AND_COMPRESSION_COMBINATIONS = bloomAndCompressionCombinations(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java index 6783cbf9614..93fc3bc6e63 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.util.Bytes; @@ -188,10 +189,12 @@ public class HFilePerformanceEvaluation { @Override void setUp() throws Exception { + HFileContext hFileContext = new HFileContext(); + hFileContext.setBlocksize(RFILE_BLOCKSIZE); writer = HFile.getWriterFactoryNoCache(conf) .withPath(fs, mf) - .withBlockSize(RFILE_BLOCKSIZE) + .withFileContext(hFileContext) .create(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java index 9212f415807..39eab70f554 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java @@ -22,26 +22,27 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.io.PrintStream; -import java.io.File; +import java.lang.reflect.Constructor; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Random; import java.util.TreeMap; -import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; -import java.lang.reflect.Constructor; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HConnection; @@ -51,21 +52,19 @@ import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; -import org.apache.hadoop.hbase.client.Durability; -import org.apache.hadoop.hbase.filter.PageFilter; -import org.apache.hadoop.hbase.filter.WhileMatchFilter; -import org.apache.hadoop.hbase.filter.Filter; -import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; -import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.BinaryComparator; -import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; +import org.apache.hadoop.hbase.filter.CompareFilter; +import org.apache.hadoop.hbase.filter.Filter; +import org.apache.hadoop.hbase.filter.PageFilter; +import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; +import org.apache.hadoop.hbase.filter.WhileMatchFilter; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Hash; import org.apache.hadoop.hbase.util.MurmurHash; import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; @@ -79,9 +78,9 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer; +import org.apache.hadoop.util.LineReader; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.hadoop.util.LineReader; /** @@ -104,9 +103,11 @@ public class PerformanceEvaluation extends Configured implements Tool { protected static final Log LOG = LogFactory.getLog(PerformanceEvaluation.class.getName()); private static final int DEFAULT_ROW_PREFIX_LENGTH = 16; - private static final int VALUE_LENGTH = 1000; + public static final int VALUE_LENGTH = 1000; private static final int ONE_GB = 1024 * 1024 * 1000; private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH; + // TODO : should we make this configurable + private static final int TAG_LENGTH = 256; public static final byte[] COMPRESSION = Bytes.toBytes("NONE"); public static final TableName TABLE_NAME = @@ -129,6 +130,8 @@ public class PerformanceEvaluation extends Configured implements Tool { private boolean writeToWAL = true; private boolean inMemoryCF = false; private int presplitRegions = 0; + private boolean useTags = false; + private int noOfTags = 1; private HConnection connection; private static final Path PERF_EVAL_DIR = new Path("performance_evaluation"); @@ -217,6 +220,8 @@ public class PerformanceEvaluation extends Configured implements Tool { private int clients = 0; private boolean flushCommits = false; private boolean writeToWAL = true; + private boolean useTags = false; + private int noOfTags = 0; public PeInputSplit() { this.startRow = 0; @@ -225,16 +230,20 @@ public class PerformanceEvaluation extends Configured implements Tool { this.clients = 0; this.flushCommits = false; this.writeToWAL = true; + this.useTags = false; + this.noOfTags = 0; } public PeInputSplit(int startRow, int rows, int totalRows, int clients, - boolean flushCommits, boolean writeToWAL) { + boolean flushCommits, boolean writeToWAL, boolean useTags, int noOfTags) { this.startRow = startRow; this.rows = rows; this.totalRows = totalRows; this.clients = clients; this.flushCommits = flushCommits; this.writeToWAL = writeToWAL; + this.useTags = useTags; + this.noOfTags = noOfTags; } @Override @@ -245,6 +254,8 @@ public class PerformanceEvaluation extends Configured implements Tool { this.clients = in.readInt(); this.flushCommits = in.readBoolean(); this.writeToWAL = in.readBoolean(); + this.useTags = in.readBoolean(); + this.noOfTags = in.readInt(); } @Override @@ -255,6 +266,8 @@ public class PerformanceEvaluation extends Configured implements Tool { out.writeInt(clients); out.writeBoolean(flushCommits); out.writeBoolean(writeToWAL); + out.writeBoolean(useTags); + out.writeInt(noOfTags); } @Override @@ -290,6 +303,14 @@ public class PerformanceEvaluation extends Configured implements Tool { public boolean isWriteToWAL() { return writeToWAL; } + + public boolean isUseTags() { + return useTags; + } + + public int getNoOfTags() { + return noOfTags; + } } /** @@ -326,6 +347,8 @@ public class PerformanceEvaluation extends Configured implements Tool { int clients = Integer.parseInt(m.group(4)); boolean flushCommits = Boolean.parseBoolean(m.group(5)); boolean writeToWAL = Boolean.parseBoolean(m.group(6)); + boolean useTags = Boolean.parseBoolean(m.group(7)); + int noOfTags = Integer.parseInt(m.group(8)); LOG.debug("split["+ splitList.size() + "] " + " startRow=" + startRow + @@ -333,11 +356,13 @@ public class PerformanceEvaluation extends Configured implements Tool { " totalRows=" + totalRows + " clients=" + clients + " flushCommits=" + flushCommits + - " writeToWAL=" + writeToWAL); + " writeToWAL=" + writeToWAL + + " useTags=" + useTags + + " noOfTags=" +noOfTags); PeInputSplit newSplit = new PeInputSplit(startRow, rows, totalRows, clients, - flushCommits, writeToWAL); + flushCommits, writeToWAL, useTags, noOfTags); splitList.add(newSplit); } } @@ -457,9 +482,10 @@ public class PerformanceEvaluation extends Configured implements Tool { // Evaluation task long elapsedTime = this.pe.runOneClient(this.cmd, value.getStartRow(), - value.getRows(), value.getTotalRows(), - value.isFlushCommits(), value.isWriteToWAL(), - HConnectionManager.createConnection(context.getConfiguration()), status); + value.getRows(), value.getTotalRows(), + value.isFlushCommits(), value.isWriteToWAL(), + value.isUseTags(), value.getNoOfTags(), + HConnectionManager.createConnection(context.getConfiguration()), status); // Collect how much time the thing took. Report as map output and // to the ELAPSED_TIME counter. context.getCounter(Counter.ELAPSED_TIME).increment(elapsedTime); @@ -566,6 +592,8 @@ public class PerformanceEvaluation extends Configured implements Tool { final Compression.Algorithm compression = this.compression; final boolean writeToWal = this.writeToWAL; final int preSplitRegions = this.presplitRegions; + final boolean useTags = this.useTags; + final int numTags = this.noOfTags; final HConnection connection = HConnectionManager.createConnection(getConf()); for (int i = 0; i < this.N; i++) { final int index = i; @@ -582,14 +610,16 @@ public class PerformanceEvaluation extends Configured implements Tool { pe.presplitRegions = preSplitRegions; pe.N = N; pe.connection = connection; + pe.useTags = useTags; + pe.noOfTags = numTags; try { long elapsedTime = pe.runOneClient(cmd, index * perClientRows, - perClientRows, R, - flushCommits, writeToWAL, connection, new Status() { - public void setStatus(final String msg) throws IOException { - LOG.info("client-" + getName() + " " + msg); - } - }); + perClientRows, R, + flushCommits, writeToWAL, useTags, noOfTags, connection, new Status() { + public void setStatus(final String msg) throws IOException { + LOG.info("client-" + getName() + " " + msg); + } + }); timings[index] = elapsedTime; LOG.info("Finished " + getName() + " in " + elapsedTime + "ms writing " + perClientRows + " rows"); @@ -748,14 +778,16 @@ public class PerformanceEvaluation extends Configured implements Tool { private TableName tableName; private boolean flushCommits; private boolean writeToWAL = true; + private boolean useTags = false; + private int noOfTags = 0; private HConnection connection; TestOptions() { } - TestOptions(int startRow, int perClientRunRows, int totalRows, - int numClientThreads, TableName tableName, - boolean flushCommits, boolean writeToWAL, HConnection connection) { + TestOptions(int startRow, int perClientRunRows, int totalRows, int numClientThreads, + TableName tableName, boolean flushCommits, boolean writeToWAL, boolean useTags, + int noOfTags, HConnection connection) { this.startRow = startRow; this.perClientRunRows = perClientRunRows; this.totalRows = totalRows; @@ -763,6 +795,8 @@ public class PerformanceEvaluation extends Configured implements Tool { this.tableName = tableName; this.flushCommits = flushCommits; this.writeToWAL = writeToWAL; + this.useTags = useTags; + this.noOfTags = noOfTags; this.connection = connection; } @@ -797,6 +831,13 @@ public class PerformanceEvaluation extends Configured implements Tool { public HConnection getConnection() { return connection; } + + public boolean isUseTags() { + return this.useTags; + } + public int getNumTags() { + return this.noOfTags; + } } /* @@ -822,6 +863,8 @@ public class PerformanceEvaluation extends Configured implements Tool { protected volatile Configuration conf; protected boolean flushCommits; protected boolean writeToWAL; + protected boolean useTags; + protected int noOfTags; protected HConnection connection; /** @@ -839,6 +882,8 @@ public class PerformanceEvaluation extends Configured implements Tool { this.conf = conf; this.flushCommits = options.isFlushCommits(); this.writeToWAL = options.isWriteToWAL(); + this.useTags = options.isUseTags(); + this.noOfTags = options.getNumTags(); this.connection = options.getConnection(); } @@ -1041,10 +1086,20 @@ public class PerformanceEvaluation extends Configured implements Tool { @Override void testRow(final int i) throws IOException { - byte [] row = getRandomRow(this.rand, this.totalRows); + byte[] row = getRandomRow(this.rand, this.totalRows); Put put = new Put(row); - byte[] value = generateValue(this.rand); - put.add(FAMILY_NAME, QUALIFIER_NAME, value); + byte[] value = generateData(this.rand, VALUE_LENGTH); + if (useTags) { + byte[] tag = generateData(this.rand, TAG_LENGTH); + Tag[] tags = new Tag[noOfTags]; + for (int n = 0; n < noOfTags; n++) { + Tag t = new Tag((byte) n, tag); + tags[n] = t; + } + put.add(FAMILY_NAME, QUALIFIER_NAME, value, tags); + } else { + put.add(FAMILY_NAME, QUALIFIER_NAME, value); + } put.setDurability(writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL); table.put(put); } @@ -1102,8 +1157,18 @@ public class PerformanceEvaluation extends Configured implements Tool { @Override void testRow(final int i) throws IOException { Put put = new Put(format(i)); - byte[] value = generateValue(this.rand); - put.add(FAMILY_NAME, QUALIFIER_NAME, value); + byte[] value = generateData(this.rand, VALUE_LENGTH); + if (useTags) { + byte[] tag = generateData(this.rand, TAG_LENGTH); + Tag[] tags = new Tag[noOfTags]; + for (int n = 0; n < noOfTags; n++) { + Tag t = new Tag((byte) n, tag); + tags[n] = t; + } + put.add(FAMILY_NAME, QUALIFIER_NAME, value, tags); + } else { + put.add(FAMILY_NAME, QUALIFIER_NAME, value); + } put.setDurability(writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL); table.put(put); } @@ -1119,7 +1184,7 @@ public class PerformanceEvaluation extends Configured implements Tool { @Override void testRow(int i) throws IOException { - byte[] value = generateValue(this.rand); + byte[] value = generateData(this.rand, VALUE_LENGTH); Scan scan = constructScan(value); ResultScanner scanner = null; try { @@ -1165,11 +1230,11 @@ public class PerformanceEvaluation extends Configured implements Tool { * consumes about 30% of CPU time. * @return Generated random value to insert into a table cell. */ - public static byte[] generateValue(final Random r) { - byte [] b = new byte [VALUE_LENGTH]; + public static byte[] generateData(final Random r, int length) { + byte [] b = new byte [length]; int i = 0; - for(i = 0; i < (VALUE_LENGTH-8); i += 8) { + for(i = 0; i < (length-8); i += 8) { b[i] = (byte) (65 + r.nextInt(26)); b[i+1] = b[i]; b[i+2] = b[i]; @@ -1181,7 +1246,7 @@ public class PerformanceEvaluation extends Configured implements Tool { } byte a = (byte) (65 + r.nextInt(26)); - for(; i < VALUE_LENGTH; i++) { + for(; i < length; i++) { b[i] = a; } return b; @@ -1192,16 +1257,16 @@ public class PerformanceEvaluation extends Configured implements Tool { } long runOneClient(final Class cmd, final int startRow, - final int perClientRunRows, final int totalRows, - boolean flushCommits, boolean writeToWAL, HConnection connection, - final Status status) + final int perClientRunRows, final int totalRows, + boolean flushCommits, boolean writeToWAL, boolean useTags, int noOfTags, + HConnection connection, final Status status) throws IOException { status.setStatus("Start " + cmd + " at offset " + startRow + " for " + perClientRunRows + " rows"); long totalElapsedTime = 0; TestOptions options = new TestOptions(startRow, perClientRunRows, - totalRows, N, tableName, flushCommits, writeToWAL, connection); + totalRows, N, tableName, flushCommits, writeToWAL, useTags, noOfTags, connection); final Test t; try { Constructor constructor = cmd.getDeclaredConstructor( @@ -1233,8 +1298,8 @@ public class PerformanceEvaluation extends Configured implements Tool { try { admin = new HBaseAdmin(getConf()); checkTable(admin); - runOneClient(cmd, 0, this.R, this.R, this.flushCommits, this.writeToWAL, this.connection, - status); + runOneClient(cmd, 0, this.R, this.R, this.flushCommits, this.writeToWAL, + this.useTags, this.noOfTags, this.connection, status); } catch (Exception e) { LOG.error("Failed", e); } @@ -1276,6 +1341,9 @@ public class PerformanceEvaluation extends Configured implements Tool { System.err .println(" inmemory Tries to keep the HFiles of the CF inmemory as far as possible. Not " + "guaranteed that reads are always served from inmemory. Default: false"); + System.err.println(" usetags Writes tags along with KVs. Use with HFile V3. Default : false"); + System.err + .println(" numoftags Specify the no of tags that would be needed. This works only if usetags is true."); System.err.println(); System.err.println(" Note: -D properties will be applied to the conf used. "); System.err.println(" For example: "); @@ -1383,6 +1451,18 @@ public class PerformanceEvaluation extends Configured implements Tool { this.connection = HConnectionManager.createConnection(getConf()); + final String useTags = "--usetags="; + if (cmd.startsWith(useTags)) { + this.useTags = Boolean.parseBoolean(cmd.substring(useTags.length())); + continue; + } + + final String noOfTags = "--nooftags="; + if (cmd.startsWith(noOfTags)) { + this.noOfTags = Integer.parseInt(cmd.substring(noOfTags.length())); + continue; + } + Class cmdClass = determineCommandClass(cmd); if (cmdClass != null) { getArgs(i + 1, args); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java index 6daa3896d71..2ebb5a49ee3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestRegionObserverInterface.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Durability; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.regionserver.HRegion; @@ -577,8 +578,10 @@ public class TestRegionObserverInterface { Configuration conf, FileSystem fs, Path path, byte[] family, byte[] qualifier) throws IOException { + HFileContext context = new HFileContext(); HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) + .withFileContext(context) .create(); long now = System.currentTimeMillis(); try { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java index 0a5cc4059d4..b77bd964252 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.util.Bytes; import org.junit.AfterClass; @@ -82,10 +83,11 @@ public class TestHalfStoreFileReader { Configuration conf = TEST_UTIL.getConfiguration(); FileSystem fs = FileSystem.get(conf); CacheConfig cacheConf = new CacheConfig(conf); - + HFileContext meta = new HFileContext(); + meta.setBlocksize(1024); HFile.Writer w = HFile.getWriterFactory(conf, cacheConf) .withPath(fs, p) - .withBlockSize(1024) + .withFileContext(meta) .create(); // write some things. @@ -147,10 +149,11 @@ public class TestHalfStoreFileReader { Configuration conf = TEST_UTIL.getConfiguration(); FileSystem fs = FileSystem.get(conf); CacheConfig cacheConf = new CacheConfig(conf); - + HFileContext meta = new HFileContext(); + meta.setBlocksize(1024); HFile.Writer w = HFile.getWriterFactory(conf, cacheConf) .withPath(fs, p) - .withBlockSize(1024) + .withFileContext(meta) .create(); // write some things. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java index db9c9192f2a..8a864eaa590 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestDataBlockEncoders.java @@ -31,9 +31,11 @@ import java.util.Random; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.test.RedundantKVGenerator; import org.junit.Test; @@ -43,82 +45,98 @@ import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; /** - * Test all of the data block encoding algorithms for correctness. - * Most of the class generate data which will test different branches in code. + * Test all of the data block encoding algorithms for correctness. Most of the + * class generate data which will test different branches in code. */ @Category(LargeTests.class) @RunWith(Parameterized.class) public class TestDataBlockEncoders { - static int NUMBER_OF_KV = 10000; - static int NUM_RANDOM_SEEKS = 10000; - private static int ENCODED_DATA_OFFSET = - HConstants.HFILEBLOCK_HEADER_SIZE + DataBlockEncoding.ID_SIZE; + private static int NUMBER_OF_KV = 10000; + private static int NUM_RANDOM_SEEKS = 10000; + + private static int ENCODED_DATA_OFFSET = HConstants.HFILEBLOCK_HEADER_SIZE + + DataBlockEncoding.ID_SIZE; private RedundantKVGenerator generator = new RedundantKVGenerator(); private Random randomizer = new Random(42l); private final boolean includesMemstoreTS; + private final boolean includesTags; @Parameters public static Collection parameters() { - return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED; } - - public TestDataBlockEncoders(boolean includesMemstoreTS) { + public TestDataBlockEncoders(boolean includesMemstoreTS, boolean includesTag) { this.includesMemstoreTS = includesMemstoreTS; + this.includesTags = includesTag; } - - private HFileBlockEncodingContext getEncodingContext( - Compression.Algorithm algo, DataBlockEncoding encoding) { + + private HFileBlockEncodingContext getEncodingContext(Compression.Algorithm algo, + DataBlockEncoding encoding) { DataBlockEncoder encoder = encoding.getEncoder(); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTags); + meta.setCompressAlgo(algo); if (encoder != null) { - return encoder.newDataBlockEncodingContext(algo, encoding, - HConstants.HFILEBLOCK_DUMMY_HEADER); + return encoder.newDataBlockEncodingContext(encoding, + HConstants.HFILEBLOCK_DUMMY_HEADER, meta); } else { - return new HFileBlockDefaultEncodingContext(algo, encoding, HConstants.HFILEBLOCK_DUMMY_HEADER); + return new HFileBlockDefaultEncodingContext(encoding, + HConstants.HFILEBLOCK_DUMMY_HEADER, meta); } } - - private byte[] encodeBytes(DataBlockEncoding encoding, - ByteBuffer dataset) throws IOException { + + private byte[] encodeBytes(DataBlockEncoding encoding, ByteBuffer dataset) + throws IOException { DataBlockEncoder encoder = encoding.getEncoder(); - HFileBlockEncodingContext encodingCtx = - getEncodingContext(Compression.Algorithm.NONE, encoding); + HFileBlockEncodingContext encodingCtx = getEncodingContext(Compression.Algorithm.NONE, + encoding); - encoder.encodeKeyValues(dataset, includesMemstoreTS, - encodingCtx); + encoder.encodeKeyValues(dataset, encodingCtx); - byte[] encodedBytesWithHeader = - encodingCtx.getUncompressedBytesWithHeader(); - byte[] encodedData = - new byte[encodedBytesWithHeader.length - ENCODED_DATA_OFFSET]; - System.arraycopy(encodedBytesWithHeader, ENCODED_DATA_OFFSET, encodedData, - 0, encodedData.length); + byte[] encodedBytesWithHeader = encodingCtx.getUncompressedBytesWithHeader(); + byte[] encodedData = new byte[encodedBytesWithHeader.length - ENCODED_DATA_OFFSET]; + System.arraycopy(encodedBytesWithHeader, ENCODED_DATA_OFFSET, encodedData, 0, + encodedData.length); return encodedData; } - - private void testAlgorithm(ByteBuffer dataset, DataBlockEncoding encoding) - throws IOException { + + private void testAlgorithm(ByteBuffer dataset, DataBlockEncoding encoding, + List kvList) throws IOException { // encode byte[] encodedBytes = encodeBytes(encoding, dataset); - //decode + // decode ByteArrayInputStream bais = new ByteArrayInputStream(encodedBytes); DataInputStream dis = new DataInputStream(bais); ByteBuffer actualDataset; DataBlockEncoder encoder = encoding.getEncoder(); - actualDataset = encoder.decodeKeyValues(dis, includesMemstoreTS); - + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTags); + meta.setCompressAlgo(Compression.Algorithm.NONE); + actualDataset = encoder.decodeKeyValues(dis, encoder.newDataBlockDecodingContext(meta)); dataset.rewind(); actualDataset.rewind(); + // this is because in case of prefix tree the decoded stream will not have + // the + // mvcc in it. + // if (encoding != DataBlockEncoding.PREFIX_TREE) { assertEquals("Encoding -> decoding gives different results for " + encoder, Bytes.toStringBinary(dataset), Bytes.toStringBinary(actualDataset)); + // } } /** * Test data block encoding of empty KeyValue. - * @throws IOException On test failure. + * + * @throws IOException + * On test failure. */ @Test public void testEmptyKeyValues() throws IOException { @@ -127,15 +145,26 @@ public class TestDataBlockEncoders { byte[] family = new byte[0]; byte[] qualifier = new byte[0]; byte[] value = new byte[0]; - kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value)); - kvList.add(new KeyValue(row, family, qualifier, 0l, Type.Put, value)); - testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, - includesMemstoreTS)); + if (!includesTags) { + kvList.add(new KeyValue(row, family, qualifier, 0l, value)); + kvList.add(new KeyValue(row, family, qualifier, 0l, value)); + } else { + byte[] metaValue1 = Bytes.toBytes("metaValue1"); + byte[] metaValue2 = Bytes.toBytes("metaValue2"); + kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1, + metaValue1) })); + kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1, + metaValue2) })); + } + testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS), + kvList); } /** * Test KeyValues with negative timestamp. - * @throws IOException On test failure. + * + * @throws IOException + * On test failure. */ @Test public void testNegativeTimestamps() throws IOException { @@ -144,13 +173,22 @@ public class TestDataBlockEncoders { byte[] family = new byte[0]; byte[] qualifier = new byte[0]; byte[] value = new byte[0]; - kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value)); - kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value)); - testEncodersOnDataset( - RedundantKVGenerator.convertKvToByteBuffer(kvList, - includesMemstoreTS)); + if (includesTags) { + byte[] metaValue1 = Bytes.toBytes("metaValue1"); + byte[] metaValue2 = Bytes.toBytes("metaValue2"); + kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1, + metaValue1) })); + kvList.add(new KeyValue(row, family, qualifier, 0l, value, new Tag[] { new Tag((byte) 1, + metaValue2) })); + } else { + kvList.add(new KeyValue(row, family, qualifier, -1l, Type.Put, value)); + kvList.add(new KeyValue(row, family, qualifier, -2l, Type.Put, value)); + } + testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS), + kvList); } + /** * Test whether compression -> decompression gives the consistent results on * pseudorandom sample. @@ -158,41 +196,42 @@ public class TestDataBlockEncoders { */ @Test public void testExecutionOnSample() throws IOException { - testEncodersOnDataset( - RedundantKVGenerator.convertKvToByteBuffer( - generator.generateTestKeyValues(NUMBER_OF_KV), - includesMemstoreTS)); + List kvList = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); + testEncodersOnDataset(RedundantKVGenerator.convertKvToByteBuffer(kvList, includesMemstoreTS), + kvList); } /** * Test seeking while file is encoded. */ @Test - public void testSeekingOnSample() throws IOException{ - List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV); - ByteBuffer originalBuffer = - RedundantKVGenerator.convertKvToByteBuffer(sampleKv, - includesMemstoreTS); + public void testSeekingOnSample() throws IOException { + List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); + ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv, + includesMemstoreTS); // create all seekers - List encodedSeekers = - new ArrayList(); + List encodedSeekers = new ArrayList(); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { if (encoding.getEncoder() == null) { continue; } - ByteBuffer encodedBuffer = - ByteBuffer.wrap(encodeBytes(encoding, originalBuffer)); + + ByteBuffer encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer)); DataBlockEncoder encoder = encoding.getEncoder(); - DataBlockEncoder.EncodedSeeker seeker = - encoder.createSeeker(KeyValue.COMPARATOR, includesMemstoreTS); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTags); + meta.setCompressAlgo(Compression.Algorithm.NONE); + DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, + encoder.newDataBlockDecodingContext(meta)); seeker.setCurrentBuffer(encodedBuffer); encodedSeekers.add(seeker); } - // test it! // try a few random seeks - for (boolean seekBefore : new boolean[] {false, true}) { + for (boolean seekBefore : new boolean[] { false, true }) { for (int i = 0; i < NUM_RANDOM_SEEKS; ++i) { int keyValueId; if (!seekBefore) { @@ -208,46 +247,46 @@ public class TestDataBlockEncoders { // check edge cases checkSeekingConsistency(encodedSeekers, false, sampleKv.get(0)); - for (boolean seekBefore : new boolean[] {false, true}) { - checkSeekingConsistency(encodedSeekers, seekBefore, - sampleKv.get(sampleKv.size() - 1)); + for (boolean seekBefore : new boolean[] { false, true }) { + checkSeekingConsistency(encodedSeekers, seekBefore, sampleKv.get(sampleKv.size() - 1)); KeyValue midKv = sampleKv.get(sampleKv.size() / 2); KeyValue lastMidKv = midKv.createLastOnRowCol(); checkSeekingConsistency(encodedSeekers, seekBefore, lastMidKv); } } - /** - * Test iterating on encoded buffers. - */ @Test public void testNextOnSample() { - List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV); - ByteBuffer originalBuffer = - RedundantKVGenerator.convertKvToByteBuffer(sampleKv, - includesMemstoreTS); + List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); + ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv, + includesMemstoreTS); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { if (encoding.getEncoder() == null) { continue; } + DataBlockEncoder encoder = encoding.getEncoder(); ByteBuffer encodedBuffer = null; try { encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer)); } catch (IOException e) { - throw new RuntimeException(String.format( - "Bug while encoding using '%s'", encoder.toString()), e); + throw new RuntimeException(String.format("Bug while encoding using '%s'", + encoder.toString()), e); } - DataBlockEncoder.EncodedSeeker seeker = - encoder.createSeeker(KeyValue.COMPARATOR, includesMemstoreTS); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTags); + meta.setCompressAlgo(Compression.Algorithm.NONE); + DataBlockEncoder.EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, + encoder.newDataBlockDecodingContext(meta)); seeker.setCurrentBuffer(encodedBuffer); int i = 0; do { KeyValue expectedKeyValue = sampleKv.get(i); ByteBuffer keyValue = seeker.getKeyValueBuffer(); - if (0 != Bytes.compareTo( - keyValue.array(), keyValue.arrayOffset(), keyValue.limit(), + if (0 != Bytes.compareTo(keyValue.array(), keyValue.arrayOffset(), keyValue.limit(), expectedKeyValue.getBuffer(), expectedKeyValue.getOffset(), expectedKeyValue.getLength())) { @@ -257,19 +296,16 @@ public class TestDataBlockEncoders { int leftOff = keyValue.arrayOffset(); int rightOff = expectedKeyValue.getOffset(); int length = Math.min(keyValue.limit(), expectedKeyValue.getLength()); - while (commonPrefix < length && - left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) { + while (commonPrefix < length + && left[commonPrefix + leftOff] == right[commonPrefix + rightOff]) { commonPrefix++; } - fail(String.format( - "next() produces wrong results " + - "encoder: %s i: %d commonPrefix: %d" + - "\n expected %s\n actual %s", - encoder.toString(), i, commonPrefix, - Bytes.toStringBinary(expectedKeyValue.getBuffer(), - expectedKeyValue.getOffset(), expectedKeyValue.getLength()), - Bytes.toStringBinary(keyValue))); + fail(String.format("next() produces wrong results " + + "encoder: %s i: %d commonPrefix: %d" + "\n expected %s\n actual %s", encoder + .toString(), i, commonPrefix, Bytes.toStringBinary(expectedKeyValue.getBuffer(), + expectedKeyValue.getOffset(), expectedKeyValue.getLength()), Bytes + .toStringBinary(keyValue))); } i++; } while (seeker.next()); @@ -281,10 +317,9 @@ public class TestDataBlockEncoders { */ @Test public void testFirstKeyInBlockOnSample() { - List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV); - ByteBuffer originalBuffer = - RedundantKVGenerator.convertKvToByteBuffer(sampleKv, - includesMemstoreTS); + List sampleKv = generator.generateTestKeyValues(NUMBER_OF_KV, includesTags); + ByteBuffer originalBuffer = RedundantKVGenerator.convertKvToByteBuffer(sampleKv, + includesMemstoreTS); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { if (encoding.getEncoder() == null) { @@ -295,39 +330,35 @@ public class TestDataBlockEncoders { try { encodedBuffer = ByteBuffer.wrap(encodeBytes(encoding, originalBuffer)); } catch (IOException e) { - throw new RuntimeException(String.format( - "Bug while encoding using '%s'", encoder.toString()), e); + throw new RuntimeException(String.format("Bug while encoding using '%s'", + encoder.toString()), e); } ByteBuffer keyBuffer = encoder.getFirstKeyInBlock(encodedBuffer); KeyValue firstKv = sampleKv.get(0); - if (0 != Bytes.compareTo( - keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(), - firstKv.getBuffer(), firstKv.getKeyOffset(), - firstKv.getKeyLength())) { + if (0 != Bytes.compareTo(keyBuffer.array(), keyBuffer.arrayOffset(), keyBuffer.limit(), + firstKv.getBuffer(), firstKv.getKeyOffset(), firstKv.getKeyLength())) { int commonPrefix = 0; int length = Math.min(keyBuffer.limit(), firstKv.getKeyLength()); - while (commonPrefix < length && - keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] == - firstKv.getBuffer()[firstKv.getKeyOffset() + commonPrefix]) { + while (commonPrefix < length + && keyBuffer.array()[keyBuffer.arrayOffset() + commonPrefix] == firstKv.getBuffer()[firstKv + .getKeyOffset() + commonPrefix]) { commonPrefix++; } - fail(String.format("Bug in '%s' commonPrefix %d", - encoder.toString(), commonPrefix)); + fail(String.format("Bug in '%s' commonPrefix %d", encoder.toString(), commonPrefix)); } } } - - private void checkSeekingConsistency( - List encodedSeekers, boolean seekBefore, - KeyValue keyValue) { + + private void checkSeekingConsistency(List encodedSeekers, + boolean seekBefore, KeyValue keyValue) { ByteBuffer expectedKeyValue = null; ByteBuffer expectedKey = null; ByteBuffer expectedValue = null; for (DataBlockEncoder.EncodedSeeker seeker : encodedSeekers) { - seeker.seekToKeyInBlock(keyValue.getBuffer(), - keyValue.getKeyOffset(), keyValue.getKeyLength(), seekBefore); + seeker.seekToKeyInBlock(keyValue.getBuffer(), keyValue.getKeyOffset(), + keyValue.getKeyLength(), seekBefore); seeker.rewind(); ByteBuffer actualKeyValue = seeker.getKeyValueBuffer(); @@ -353,9 +384,8 @@ public class TestDataBlockEncoders { } } } - - private void testEncodersOnDataset(ByteBuffer onDataset) - throws IOException{ + + private void testEncodersOnDataset(ByteBuffer onDataset, List kvList) throws IOException { ByteBuffer dataset = ByteBuffer.allocate(onDataset.capacity()); onDataset.rewind(); dataset.put(onDataset); @@ -366,11 +396,13 @@ public class TestDataBlockEncoders { if (encoding.getEncoder() == null) { continue; } - testAlgorithm(dataset, encoding); + + testAlgorithm(dataset, encoding, kvList); // ensure that dataset is unchanged dataset.rewind(); assertEquals("Input of two methods is changed", onDataset, dataset); } } + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java index 01fdeeeefdf..54054ffb408 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestEncodedSeekers.java @@ -29,10 +29,12 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.LruBlockCache; import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.HRegion; @@ -68,6 +70,7 @@ public class TestEncodedSeekers { private final HBaseTestingUtility testUtil = HBaseTestingUtility.createLocalHTU(); private final DataBlockEncoding encoding; private final boolean encodeOnDisk; + private final boolean includeTags; /** Enable when debugging */ private static final boolean VERBOSE = false; @@ -76,21 +79,27 @@ public class TestEncodedSeekers { public static Collection parameters() { List paramList = new ArrayList(); for (DataBlockEncoding encoding : DataBlockEncoding.values()) { - for (boolean encodeOnDisk : new boolean[]{false, true}) { - paramList.add(new Object[] { encoding, encodeOnDisk }); + for (boolean includeTags : new boolean[] { false, true }) { + for (boolean encodeOnDisk : new boolean[] { false, true }) { + paramList.add(new Object[] { encoding, encodeOnDisk, includeTags }); + } } } return paramList; } - public TestEncodedSeekers(DataBlockEncoding encoding, boolean encodeOnDisk) { + public TestEncodedSeekers(DataBlockEncoding encoding, boolean encodeOnDisk, boolean includeTags) { this.encoding = encoding; this.encodeOnDisk = encodeOnDisk; + this.includeTags = includeTags; } @Test public void testEncodedSeeker() throws IOException { System.err.println("Testing encoded seekers for encoding " + encoding); + if(includeTags) { + testUtil.getConfiguration().setInt(HFile.FORMAT_VERSION_KEY, 3); + } LruBlockCache cache = (LruBlockCache)new CacheConfig(testUtil.getConfiguration()).getBlockCache(); cache.clearCache(); @@ -134,6 +143,11 @@ public class TestEncodedSeekers { byte[] col = Bytes.toBytes(String.valueOf(j)); byte[] value = dataGenerator.generateRandomSizeValue(key, col); put.add(CF_BYTES, col, value); + if(includeTags) { + Tag[] tag = new Tag[1]; + tag[0] = new Tag((byte)1, "Visibility"); + put.add(CF_BYTES, col, value, tag); + } if(VERBOSE){ KeyValue kvPut = new KeyValue(key, CF_BYTES, col, value); System.err.println(Strings.padFront(i+"", ' ', 4)+" "+kvPut); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java index b53596948d3..83d2470c0b1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/encoding/TestPrefixTreeEncoding.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.io.encoding; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.fail; @@ -27,6 +28,7 @@ import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collection; import java.util.List; import java.util.Random; import java.util.concurrent.ConcurrentSkipListSet; @@ -35,24 +37,30 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder.EncodedSeeker; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.CollectionBackedScanner; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; /** * Tests scanning/seeking data with PrefixTree Encoding. */ +@RunWith(Parameterized.class) @Category(SmallTests.class) public class TestPrefixTreeEncoding { - private static final Log LOG = LogFactory - .getLog(TestPrefixTreeEncoding.class); - static final String CF = "EncodingTestCF"; - static final byte[] CF_BYTES = Bytes.toBytes(CF); + private static final Log LOG = LogFactory.getLog(TestPrefixTreeEncoding.class); + private static final String CF = "EncodingTestCF"; + private static final byte[] CF_BYTES = Bytes.toBytes(CF); private static final int NUM_ROWS_PER_BATCH = 50; private static final int NUM_COLS_PER_ROW = 20; @@ -61,7 +69,21 @@ public class TestPrefixTreeEncoding { KeyValue.COMPARATOR); private static boolean formatRowNum = false; - + + @Parameters + public static Collection parameters() { + List paramList = new ArrayList(); + { + paramList.add(new Object[] { false }); + paramList.add(new Object[] { true }); + } + return paramList; + } + private final boolean includesTag; + public TestPrefixTreeEncoding(boolean includesTag) { + this.includesTag = includesTag; + } + @Before public void setUp() throws Exception { kvset.clear(); @@ -73,63 +95,74 @@ public class TestPrefixTreeEncoding { formatRowNum = true; PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; - ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, false); + ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, false, includesTag); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(false); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(Algorithm.NONE); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( - Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]); - encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx); - EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, false); + DataBlockEncoding.PREFIX_TREE, new byte[0], meta); + encoder.encodeKeyValues(dataBuffer, blkEncodingCtx); + EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, + encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); - ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, - DataBlockEncoding.ID_SIZE, onDiskBytes.length - - DataBlockEncoding.ID_SIZE); + ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, + onDiskBytes.length - DataBlockEncoding.ID_SIZE); seeker.setCurrentBuffer(readBuffer); // Seek before the first keyvalue; - KeyValue seekKey = KeyValue.createFirstDeleteFamilyOnRow( - getRowKey(batchId, 0), CF_BYTES); - seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), - seekKey.getKeyLength(), true); + KeyValue seekKey = KeyValue.createFirstDeleteFamilyOnRow(getRowKey(batchId, 0), CF_BYTES); + seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), seekKey.getKeyLength(), + true); assertEquals(null, seeker.getKeyValue()); // Seek before the middle keyvalue; - seekKey = KeyValue.createFirstDeleteFamilyOnRow( - getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), CF_BYTES); - seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), - seekKey.getKeyLength(), true); + seekKey = KeyValue.createFirstDeleteFamilyOnRow(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3), + CF_BYTES); + seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), seekKey.getKeyLength(), + true); assertNotNull(seeker.getKeyValue()); - assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1), seeker - .getKeyValue().getRow()); + assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH / 3 - 1), seeker.getKeyValue().getRow()); // Seek before the last keyvalue; - seekKey = KeyValue.createFirstDeleteFamilyOnRow(Bytes.toBytes("zzzz"), - CF_BYTES); - seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), - seekKey.getKeyLength(), true); + seekKey = KeyValue.createFirstDeleteFamilyOnRow(Bytes.toBytes("zzzz"), CF_BYTES); + seeker.seekToKeyInBlock(seekKey.getBuffer(), seekKey.getKeyOffset(), seekKey.getKeyLength(), + true); assertNotNull(seeker.getKeyValue()); - assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1), seeker - .getKeyValue().getRow()); + assertArrayEquals(getRowKey(batchId, NUM_ROWS_PER_BATCH - 1), seeker.getKeyValue().getRow()); } @Test public void testScanWithRandomData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); - ByteBuffer dataBuffer = generateRandomTestData(kvset, numBatchesWritten++); + ByteBuffer dataBuffer = generateRandomTestData(kvset, numBatchesWritten++, includesTag); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(false); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(Algorithm.NONE); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( - Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]); - encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx); - EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, false); - byte[] onDiskBytes=blkEncodingCtx.getOnDiskBytesWithHeader(); - ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, - DataBlockEncoding.ID_SIZE, onDiskBytes.length - - DataBlockEncoding.ID_SIZE); + DataBlockEncoding.PREFIX_TREE, new byte[0], meta); + encoder.encodeKeyValues(dataBuffer, blkEncodingCtx); + EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, + encoder.newDataBlockDecodingContext(meta)); + byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); + ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, + onDiskBytes.length - DataBlockEncoding.ID_SIZE); seeker.setCurrentBuffer(readBuffer); KeyValue previousKV = null; - do{ + do { KeyValue currentKV = seeker.getKeyValue(); + System.out.println(currentKV); if (previousKV != null && KeyValue.COMPARATOR.compare(currentKV, previousKV) < 0) { dumpInputKVSet(); - fail("Current kv " + currentKV + " is smaller than previous keyvalue " - + previousKV); + fail("Current kv " + currentKV + " is smaller than previous keyvalue " + previousKV); + } + if (!includesTag) { + assertFalse(currentKV.getTagsLength() > 0); + } else { + Assert.assertTrue(currentKV.getTagsLength() > 0); } previousKV = currentKV; } while (seeker.next()); @@ -139,15 +172,20 @@ public class TestPrefixTreeEncoding { public void testSeekWithRandomData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; - ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId); + ByteBuffer dataBuffer = generateRandomTestData(kvset, batchId, includesTag); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(false); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(Algorithm.NONE); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( - Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]); - encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx); - EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, false); + DataBlockEncoding.PREFIX_TREE, new byte[0], meta); + encoder.encodeKeyValues(dataBuffer, blkEncodingCtx); + EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, + encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); - ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, - DataBlockEncoding.ID_SIZE, onDiskBytes.length - - DataBlockEncoding.ID_SIZE); + ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, + onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); } @@ -155,19 +193,23 @@ public class TestPrefixTreeEncoding { public void testSeekWithFixedData() throws Exception { PrefixTreeCodec encoder = new PrefixTreeCodec(); int batchId = numBatchesWritten++; - ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId); + ByteBuffer dataBuffer = generateFixedTestData(kvset, batchId, includesTag); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(false); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(Algorithm.NONE); HFileBlockEncodingContext blkEncodingCtx = new HFileBlockDefaultEncodingContext( - Algorithm.NONE, DataBlockEncoding.PREFIX_TREE, new byte[0]); - encoder.encodeKeyValues(dataBuffer, false, blkEncodingCtx); + DataBlockEncoding.PREFIX_TREE, new byte[0], meta); + encoder.encodeKeyValues(dataBuffer, blkEncodingCtx); EncodedSeeker seeker = encoder.createSeeker(KeyValue.COMPARATOR, - false); + encoder.newDataBlockDecodingContext(meta)); byte[] onDiskBytes = blkEncodingCtx.getOnDiskBytesWithHeader(); - ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, - DataBlockEncoding.ID_SIZE, onDiskBytes.length - - DataBlockEncoding.ID_SIZE); + ByteBuffer readBuffer = ByteBuffer.wrap(onDiskBytes, DataBlockEncoding.ID_SIZE, + onDiskBytes.length - DataBlockEncoding.ID_SIZE); verifySeeking(seeker, readBuffer, batchId); } - + private void verifySeeking(EncodedSeeker encodeSeeker, ByteBuffer encodedData, int batchId) { List kvList = new ArrayList(); @@ -202,73 +244,93 @@ public class TestPrefixTreeEncoding { System.out.println(kv); } } - - private static ByteBuffer generateFixedTestData( - ConcurrentSkipListSet kvset, int batchId) throws Exception { - return generateFixedTestData(kvset, batchId, true); + + private static ByteBuffer generateFixedTestData(ConcurrentSkipListSet kvset, + int batchId, boolean useTags) throws Exception { + return generateFixedTestData(kvset, batchId, true, useTags); } - private static ByteBuffer generateFixedTestData( - ConcurrentSkipListSet kvset, int batchId, boolean partial) - throws Exception { + private static ByteBuffer generateFixedTestData(ConcurrentSkipListSet kvset, + int batchId, boolean partial, boolean useTags) throws Exception { ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { - if (partial && i / 10 % 2 == 1) continue; + if (partial && i / 10 % 2 == 1) + continue; for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { - KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, - getQualifier(j), getValue(batchId, i, j)); - kvset.add(kv); + if (!useTags) { + KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue( + batchId, i, j)); + kvset.add(kv); + } else { + KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l, + getValue(batchId, i, j), new Tag[] { new Tag((byte) 1, "metaValue1") }); + kvset.add(kv); + } } } for (KeyValue kv : kvset) { userDataStream.writeInt(kv.getKeyLength()); userDataStream.writeInt(kv.getValueLength()); - userDataStream - .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); - userDataStream.write(kv.getBuffer(), kv.getValueOffset(), - kv.getValueLength()); + userDataStream.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); + userDataStream.write(kv.getBuffer(), kv.getValueOffset(), kv.getValueLength()); + if (useTags) { + userDataStream.writeShort(kv.getTagsLength()); + userDataStream.write(kv.getBuffer(), kv.getValueOffset() + kv.getValueLength() + + Bytes.SIZEOF_SHORT, kv.getTagsLength()); + } } return ByteBuffer.wrap(baosInMemory.toByteArray()); } - private static ByteBuffer generateRandomTestData( - ConcurrentSkipListSet kvset, int batchId) throws Exception { + private static ByteBuffer generateRandomTestData(ConcurrentSkipListSet kvset, + int batchId, boolean useTags) throws Exception { ByteArrayOutputStream baosInMemory = new ByteArrayOutputStream(); DataOutputStream userDataStream = new DataOutputStream(baosInMemory); Random random = new Random(); for (int i = 0; i < NUM_ROWS_PER_BATCH; ++i) { - if (random.nextInt(100) < 50) continue; + if (random.nextInt(100) < 50) + continue; for (int j = 0; j < NUM_COLS_PER_ROW; ++j) { - if (random.nextInt(100) < 50) continue; - KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, - getQualifier(j), getValue(batchId, i, j)); - kvset.add(kv); + if (random.nextInt(100) < 50) + continue; + if (!useTags) { + KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), getValue( + batchId, i, j)); + kvset.add(kv); + } else { + KeyValue kv = new KeyValue(getRowKey(batchId, i), CF_BYTES, getQualifier(j), 0l, + getValue(batchId, i, j), new Tag[] { new Tag((byte) 1, "metaValue1") }); + kvset.add(kv); + } } } + for (KeyValue kv : kvset) { userDataStream.writeInt(kv.getKeyLength()); userDataStream.writeInt(kv.getValueLength()); - userDataStream - .write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); - userDataStream.write(kv.getBuffer(), kv.getValueOffset(), - kv.getValueLength()); + userDataStream.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); + userDataStream.write(kv.getBuffer(), kv.getValueOffset(), kv.getValueLength()); + if (useTags) { + userDataStream.writeShort(kv.getTagsLength()); + userDataStream.write(kv.getBuffer(), kv.getValueOffset() + kv.getValueLength() + + Bytes.SIZEOF_SHORT, kv.getTagsLength()); + } } return ByteBuffer.wrap(baosInMemory.toByteArray()); } private static byte[] getRowKey(int batchId, int i) { - return Bytes.toBytes("batch" + batchId + "_row" - + (formatRowNum ? String.format("%04d", i) : i)); + return Bytes + .toBytes("batch" + batchId + "_row" + (formatRowNum ? String.format("%04d", i) : i)); } private static byte[] getQualifier(int j) { - return Bytes.toBytes("col" + j); + return Bytes.toBytes("colfdfafhfhsdfhsdfh" + j); } private static byte[] getValue(int batchId, int i, int j) { - return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) - + "_col" + j); + return Bytes.toBytes("value_for_" + Bytes.toString(getRowKey(batchId, i)) + "_col" + j); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java index c9125a7dc25..adb5bd94220 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/CacheTestUtils.java @@ -37,6 +37,8 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.MultithreadedTestUtil; import org.apache.hadoop.hbase.MultithreadedTestUtil.TestThread; import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; import org.apache.hadoop.hbase.util.ChecksumType; @@ -339,13 +341,18 @@ public class CacheTestUtils { cachedBuffer.putInt(uncompressedSizeWithoutHeader); cachedBuffer.putLong(prevBlockOffset); cachedBuffer.rewind(); - + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(false); + meta.setCompressAlgo(Compression.Algorithm.NONE); + meta.setBytesPerChecksum(0); + meta.setChecksumType(ChecksumType.NULL); HFileBlock generated = new HFileBlock(BlockType.DATA, onDiskSizeWithoutHeader, uncompressedSizeWithoutHeader, prevBlockOffset, cachedBuffer, HFileBlock.DONT_FILL_HEADER, - blockSize, includesMemstoreTS, HFileBlock.MINOR_VERSION_NO_CHECKSUM, - 0, ChecksumType.NULL.getCode(), - onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE); + blockSize, + onDiskSizeWithoutHeader + HConstants.HFILEBLOCK_HEADER_SIZE, meta); String strKey; /* No conflicting keys */ diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TagUsage.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TagUsage.java new file mode 100644 index 00000000000..747d6403eea --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TagUsage.java @@ -0,0 +1,31 @@ + /* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; +/** + * Used in testcases only. + */ +public enum TagUsage { + // No tags would be added + NO_TAG, + // KVs with tags + ONLY_TAG, + // kvs with and without tags + PARTIAL_TAG; + +} \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java index 776928391a6..97c0465fad6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java @@ -40,13 +40,14 @@ import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFile; -import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; @@ -183,6 +184,7 @@ public class TestCacheOnWrite { @Before public void setUp() throws IOException { conf = TEST_UTIL.getConfiguration(); + this.conf.set("dfs.datanode.data.dir.perm", "700"); conf.setInt(HFile.FORMAT_VERSION_KEY, HFile.MAX_FORMAT_VERSION); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, INDEX_BLOCK_SIZE); conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, @@ -207,13 +209,24 @@ public class TestCacheOnWrite { @Test public void testStoreFileCacheOnWrite() throws IOException { - writeStoreFile(); - readStoreFile(); + testStoreFileCacheOnWriteInternals(false); + testStoreFileCacheOnWriteInternals(true); } - private void readStoreFile() throws IOException { - HFileReaderV2 reader = (HFileReaderV2) HFile.createReaderWithEncoding(fs, - storeFilePath, cacheConf, encoder.getEncodingInCache()); + protected void testStoreFileCacheOnWriteInternals(boolean useTags) throws IOException { + writeStoreFile(useTags); + readStoreFile(useTags); + } + + private void readStoreFile(boolean useTags) throws IOException { + AbstractHFileReader reader; + if (useTags) { + reader = (HFileReaderV3) HFile.createReaderWithEncoding(fs, storeFilePath, cacheConf, + encoder.getEncodingInCache()); + } else { + reader = (HFileReaderV2) HFile.createReaderWithEncoding(fs, storeFilePath, cacheConf, + encoder.getEncodingInCache()); + } LOG.info("HFile information: " + reader); final boolean cacheBlocks = false; final boolean pread = false; @@ -260,10 +273,13 @@ public class TestCacheOnWrite { String countByType = blockCountByType.toString(); BlockType cachedDataBlockType = encoderType.encodeInCache ? BlockType.ENCODED_DATA : BlockType.DATA; - assertEquals("{" + cachedDataBlockType - + "=1379, LEAF_INDEX=154, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=18}", - countByType); - + if (useTags) { + assertEquals("{" + cachedDataBlockType + + "=1550, LEAF_INDEX=173, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=20}", countByType); + } else { + assertEquals("{" + cachedDataBlockType + + "=1379, LEAF_INDEX=154, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=18}", countByType); + } reader.close(); } @@ -283,33 +299,54 @@ public class TestCacheOnWrite { } } - public void writeStoreFile() throws IOException { + public void writeStoreFile(boolean useTags) throws IOException { + if(useTags) { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } else { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2); + } Path storeFileParentDir = new Path(TEST_UTIL.getDataTestDir(), "test_cache_on_write"); - StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs, - DATA_BLOCK_SIZE) - .withOutputDir(storeFileParentDir) - .withCompression(compress) - .withDataBlockEncoder(encoder) - .withComparator(KeyValue.COMPARATOR) - .withBloomType(BLOOM_TYPE) - .withMaxKeyCount(NUM_KV) - .withChecksumType(CKTYPE) - .withBytesPerChecksum(CKBYTES) - .build(); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(compress); + meta.setChecksumType(CKTYPE); + meta.setBytesPerChecksum(CKBYTES); + meta.setBlocksize(DATA_BLOCK_SIZE); + meta.setEncodingInCache(encoder.getEncodingInCache()); + meta.setEncodingOnDisk(encoder.getEncodingOnDisk()); + StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, cacheConf, fs) + .withOutputDir(storeFileParentDir).withComparator(KeyValue.COMPARATOR) + .withFileContext(meta) + .withBloomType(BLOOM_TYPE).withMaxKeyCount(NUM_KV).build(); final int rowLen = 32; for (int i = 0; i < NUM_KV; ++i) { byte[] k = TestHFileWriterV2.randomOrderedKey(rand, i); byte[] v = TestHFileWriterV2.randomValue(rand); int cfLen = rand.nextInt(k.length - rowLen + 1); - KeyValue kv = new KeyValue( + KeyValue kv; + if(useTags) { + Tag t = new Tag((byte) 1, "visibility"); + List tagList = new ArrayList(); + tagList.add(t); + Tag[] tags = new Tag[1]; + tags[0] = t; + kv = new KeyValue( + k, 0, rowLen, + k, rowLen, cfLen, + k, rowLen + cfLen, k.length - rowLen - cfLen, + rand.nextLong(), + generateKeyType(rand), + v, 0, v.length, tagList); + } else { + kv = new KeyValue( k, 0, rowLen, k, rowLen, cfLen, k, rowLen + cfLen, k.length - rowLen - cfLen, rand.nextLong(), generateKeyType(rand), v, 0, v.length); + } sfw.append(kv); } @@ -319,6 +356,16 @@ public class TestCacheOnWrite { @Test public void testNotCachingDataBlocksDuringCompaction() throws IOException { + testNotCachingDataBlocksDuringCompactionInternals(false); + testNotCachingDataBlocksDuringCompactionInternals(true); + } + + protected void testNotCachingDataBlocksDuringCompactionInternals(boolean useTags) throws IOException { + if (useTags) { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } else { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 2); + } // TODO: need to change this test if we add a cache size threshold for // compactions, or if we implement some other kind of intelligent logic for // deciding what blocks to cache-on-write on compaction. @@ -347,8 +394,14 @@ public class TestCacheOnWrite { String qualStr = "col" + iCol; String valueStr = "value_" + rowStr + "_" + qualStr; for (int iTS = 0; iTS < 5; ++iTS) { - p.add(cfBytes, Bytes.toBytes(qualStr), ts++, - Bytes.toBytes(valueStr)); + if (useTags) { + Tag t = new Tag((byte) 1, "visibility"); + Tag[] tags = new Tag[1]; + tags[0] = t; + p.add(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr), tags); + } else { + p.add(cfBytes, Bytes.toBytes(qualStr), ts++, Bytes.toBytes(valueStr)); + } } } region.put(p); @@ -369,6 +422,5 @@ public class TestCacheOnWrite { region.close(); blockCache.shutdown(); } - } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java index ec32382bb26..c0a0b701cfe 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestChecksum.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.util.ChecksumType; @@ -79,6 +80,11 @@ public class TestChecksum { */ @Test public void testChecksumCorruption() throws IOException { + testChecksumCorruptionInternals(false); + testChecksumCorruptionInternals(true); + } + + protected void testChecksumCorruptionInternals(boolean useTags) throws IOException { for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : new boolean[] { false, true }) { LOG.info("testChecksumCorruption: Compression algorithm: " + algo + @@ -86,9 +92,13 @@ public class TestChecksum { Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + algo); FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, - true, HFile.DEFAULT_CHECKSUM_TYPE, - HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(true); + meta.setIncludesTags(useTags); + meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE); + meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); long totalSize = 0; for (int blockId = 0; blockId < 2; ++blockId) { DataOutputStream dos = hbw.startWriting(BlockType.DATA); @@ -104,8 +114,12 @@ public class TestChecksum { // Do a read that purposely introduces checksum verification failures. FSDataInputStreamWrapper is = new FSDataInputStreamWrapper(fs, path); - HFileBlock.FSReader hbr = new FSReaderV2Test(is, algo, - totalSize, HFile.MAX_FORMAT_VERSION, fs, path); + meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(true); + meta.setIncludesTags(useTags); + meta.setUsesHBaseChecksum(true); + HFileBlock.FSReader hbr = new FSReaderV2Test(is, totalSize, fs, path, meta); HFileBlock b = hbr.readBlockData(0, -1, -1, pread); b.sanityCheck(); assertEquals(4936, b.getUncompressedSizeWithoutHeader()); @@ -147,8 +161,7 @@ public class TestChecksum { HFileSystem newfs = new HFileSystem(TEST_UTIL.getConfiguration(), false); assertEquals(false, newfs.useHBaseChecksum()); is = new FSDataInputStreamWrapper(newfs, path); - hbr = new FSReaderV2Test(is, algo, - totalSize, HFile.MAX_FORMAT_VERSION, newfs, path); + hbr = new FSReaderV2Test(is, totalSize, newfs, path, meta); b = hbr.readBlockData(0, -1, -1, pread); is.close(); b.sanityCheck(); @@ -173,14 +186,26 @@ public class TestChecksum { */ @Test public void testChecksumChunks() throws IOException { + testChecksumInternals(false); + testChecksumInternals(true); + } + + protected void testChecksumInternals(boolean useTags) throws IOException { Compression.Algorithm algo = NONE; for (boolean pread : new boolean[] { false, true }) { for (int bytesPerChecksum : BYTES_PER_CHECKSUM) { Path path = new Path(TEST_UTIL.getDataTestDir(), "checksumChunk_" + algo + bytesPerChecksum); FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, - true, HFile.DEFAULT_CHECKSUM_TYPE, bytesPerChecksum); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(true); + meta.setIncludesTags(useTags); + meta.setUsesHBaseChecksum(true); + meta.setBytesPerChecksum(bytesPerChecksum); + meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, + meta); // write one block. The block has data // that is at least 6 times more than the checksum chunk size @@ -211,8 +236,14 @@ public class TestChecksum { // Read data back from file. FSDataInputStream is = fs.open(path); FSDataInputStream nochecksum = hfs.getNoChecksumFs().open(path); + meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(true); + meta.setIncludesTags(useTags); + meta.setUsesHBaseChecksum(true); + meta.setBytesPerChecksum(bytesPerChecksum); HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper( - is, nochecksum), algo, totalSize, HFile.MAX_FORMAT_VERSION, hfs, path); + is, nochecksum), totalSize, hfs, path, meta); HFileBlock b = hbr.readBlockData(0, -1, -1, pread); is.close(); b.sanityCheck(); @@ -257,9 +288,9 @@ public class TestChecksum { * checksum validations. */ static private class FSReaderV2Test extends HFileBlock.FSReaderV2 { - public FSReaderV2Test(FSDataInputStreamWrapper istream, Algorithm algo, long fileSize, - int minorVersion, FileSystem fs,Path path) throws IOException { - super(istream, algo, fileSize, minorVersion, (HFileSystem)fs, path); + public FSReaderV2Test(FSDataInputStreamWrapper istream, long fileSize, FileSystem fs, + Path path, HFileContext meta) throws IOException { + super(istream, fileSize, (HFileSystem) fs, path, meta); } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java index 65bd9b48a8f..96050a2a5d4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestFixedFileTrailer.java @@ -28,6 +28,7 @@ import java.util.Collection; import java.util.List; import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.util.Bytes; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -49,12 +50,13 @@ import org.apache.hadoop.fs.Path; public class TestFixedFileTrailer { private static final Log LOG = LogFactory.getLog(TestFixedFileTrailer.class); + private static final int MAX_COMPARATOR_NAME_LENGTH = 128; /** * The number of used fields by version. Indexed by version minus two. * Min version that we support is V2 */ - private static final int[] NUM_FIELDS_BY_VERSION = new int[] { 14 }; + private static final int[] NUM_FIELDS_BY_VERSION = new int[] { 14, 14 }; private HBaseTestingUtility util = new HBaseTestingUtility(); private FileSystem fs; @@ -86,7 +88,7 @@ public class TestFixedFileTrailer { @Test public void testTrailer() throws IOException { FixedFileTrailer t = new FixedFileTrailer(version, - HFileBlock.MINOR_VERSION_NO_CHECKSUM); + HFileReaderV2.PBUF_TRAILER_MINOR_VERSION); t.setDataIndexCount(3); t.setEntryCount(((long) Integer.MAX_VALUE) + 1); @@ -119,7 +121,7 @@ public class TestFixedFileTrailer { { DataInputStream dis = new DataInputStream(bais); FixedFileTrailer t2 = new FixedFileTrailer(version, - HFileBlock.MINOR_VERSION_NO_CHECKSUM); + HFileReaderV2.PBUF_TRAILER_MINOR_VERSION); t2.deserialize(dis); assertEquals(-1, bais.read()); // Ensure we have read everything. checkLoadedTrailer(version, t, t2); @@ -163,6 +165,68 @@ public class TestFixedFileTrailer { trailerStr.split(", ").length); assertEquals(trailerStr, t4.toString()); } + + @Test + public void testTrailerForV2NonPBCompatibility() throws Exception { + if (version == 2) { + FixedFileTrailer t = new FixedFileTrailer(version, + HFileReaderV2.MINOR_VERSION_NO_CHECKSUM); + t.setDataIndexCount(3); + t.setEntryCount(((long) Integer.MAX_VALUE) + 1); + t.setLastDataBlockOffset(291); + t.setNumDataIndexLevels(3); + t.setComparatorClass(KeyValue.COMPARATOR.getClass()); + t.setFirstDataBlockOffset(9081723123L); // Completely unrealistic. + t.setUncompressedDataIndexSize(827398717L); // Something random. + t.setLoadOnOpenOffset(128); + t.setMetaIndexCount(7); + t.setTotalUncompressedBytes(129731987); + + { + DataOutputStream dos = new DataOutputStream(baos); // Limited scope. + serializeAsWritable(dos, t); + dos.flush(); + assertEquals(FixedFileTrailer.getTrailerSize(version), dos.size()); + } + + byte[] bytes = baos.toByteArray(); + baos.reset(); + assertEquals(bytes.length, FixedFileTrailer.getTrailerSize(version)); + + ByteArrayInputStream bais = new ByteArrayInputStream(bytes); + { + DataInputStream dis = new DataInputStream(bais); + FixedFileTrailer t2 = new FixedFileTrailer(version, + HFileReaderV2.MINOR_VERSION_NO_CHECKSUM); + t2.deserialize(dis); + assertEquals(-1, bais.read()); // Ensure we have read everything. + checkLoadedTrailer(version, t, t2); + } + } + } + + // Copied from FixedFileTrailer for testing the reading part of + // FixedFileTrailer of non PB + // serialized FFTs. + private void serializeAsWritable(DataOutputStream output, FixedFileTrailer fft) + throws IOException { + BlockType.TRAILER.write(output); + output.writeLong(fft.getFileInfoOffset()); + output.writeLong(fft.getLoadOnOpenDataOffset()); + output.writeInt(fft.getDataIndexCount()); + output.writeLong(fft.getUncompressedDataIndexSize()); + output.writeInt(fft.getMetaIndexCount()); + output.writeLong(fft.getTotalUncompressedBytes()); + output.writeLong(fft.getEntryCount()); + output.writeInt(fft.getCompressionCodec().ordinal()); + output.writeInt(fft.getNumDataIndexLevels()); + output.writeLong(fft.getFirstDataBlockOffset()); + output.writeLong(fft.getLastDataBlockOffset()); + Bytes.writeStringFixedSize(output, fft.getComparatorClassName(), MAX_COMPARATOR_NAME_LENGTH); + output.writeInt(FixedFileTrailer.materializeVersion(fft.getMajorVersion(), + fft.getMinorVersion())); + } + private FixedFileTrailer readTrailer(Path trailerPath) throws IOException { FSDataInputStream fsdis = fs.open(trailerPath); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java index e229a141b82..24ac455b1d9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hbase.HBaseTestCase; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; @@ -82,8 +83,10 @@ public class TestHFile extends HBaseTestCase { public void testEmptyHFile() throws IOException { if (cacheConf == null) cacheConf = new CacheConfig(conf); Path f = new Path(ROOT_DIR, getName()); + HFileContext context = new HFileContext(); + context.setIncludesTags(false); Writer w = - HFile.getWriterFactory(conf, cacheConf).withPath(fs, f).create(); + HFile.getWriterFactory(conf, cacheConf).withPath(fs, f).withFileContext(context).create(); w.close(); Reader r = HFile.createReader(fs, f, cacheConf); r.loadFileInfo(); @@ -130,8 +133,10 @@ public class TestHFile extends HBaseTestCase { public void testCorruptTruncatedHFile() throws IOException { if (cacheConf == null) cacheConf = new CacheConfig(conf); Path f = new Path(ROOT_DIR, getName()); - Writer w = HFile.getWriterFactory(conf, cacheConf).withPath(this.fs, f).create(); - writeSomeRecords(w, 0, 100); + HFileContext context = new HFileContext(); + Writer w = HFile.getWriterFactory(conf, cacheConf).withPath(this.fs, f) + .withFileContext(context).create(); + writeSomeRecords(w, 0, 100, false); w.close(); Path trunc = new Path(f.getParent(), "trucated"); @@ -148,12 +153,17 @@ public class TestHFile extends HBaseTestCase { // write some records into the tfile // write them twice - private int writeSomeRecords(Writer writer, int start, int n) + private int writeSomeRecords(Writer writer, int start, int n, boolean useTags) throws IOException { String value = "value"; for (int i = start; i < (start + n); i++) { String key = String.format(localFormatter, Integer.valueOf(i)); - writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key)); + if (useTags) { + Tag t = new Tag((byte) 1, "myTag1"); + writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key), t.getBuffer()); + } else { + writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key)); + } } return (start + n); } @@ -192,8 +202,8 @@ public class TestHFile extends HBaseTestCase { return String.format(localFormatter, Integer.valueOf(rowId)).getBytes(); } - private void writeRecords(Writer writer) throws IOException { - writeSomeRecords(writer, 0, 100); + private void writeRecords(Writer writer, boolean useTags) throws IOException { + writeSomeRecords(writer, 0, 100, useTags); writer.close(); } @@ -205,20 +215,26 @@ public class TestHFile extends HBaseTestCase { /** * test none codecs + * @param useTags */ - void basicWithSomeCodec(String codec) throws IOException { + void basicWithSomeCodec(String codec, boolean useTags) throws IOException { + if (useTags) { + conf.setInt("hfile.format.version", 3); + } if (cacheConf == null) cacheConf = new CacheConfig(conf); - Path ncTFile = new Path(ROOT_DIR, "basic.hfile." + codec.toString()); + Path ncTFile = new Path(ROOT_DIR, "basic.hfile." + codec.toString() + useTags); FSDataOutputStream fout = createFSOutput(ncTFile); + HFileContext meta = new HFileContext(); + meta.setBlocksize(minBlockSize); + meta.setCompressAlgo(AbstractHFileWriter.compressionByName(codec)); Writer writer = HFile.getWriterFactory(conf, cacheConf) .withOutputStream(fout) - .withBlockSize(minBlockSize) - .withCompression(codec) + .withFileContext(meta) // NOTE: This test is dependent on this deprecated nonstandard comparator .withComparator(new KeyValue.RawBytesComparator()) .create(); LOG.info(writer); - writeRecords(writer); + writeRecords(writer, useTags); fout.close(); FSDataInputStream fin = fs.open(ncTFile); Reader reader = HFile.createReaderFromStream(ncTFile, fs.open(ncTFile), @@ -250,8 +266,13 @@ public class TestHFile extends HBaseTestCase { } public void testTFileFeatures() throws IOException { - basicWithSomeCodec("none"); - basicWithSomeCodec("gz"); + testTFilefeaturesInternals(false); + testTFilefeaturesInternals(true); + } + + protected void testTFilefeaturesInternals(boolean useTags) throws IOException { + basicWithSomeCodec("none", useTags); + basicWithSomeCodec("gz", useTags); } private void writeNumMetablocks(Writer writer, int n) { @@ -292,10 +313,12 @@ public class TestHFile extends HBaseTestCase { if (cacheConf == null) cacheConf = new CacheConfig(conf); Path mFile = new Path(ROOT_DIR, "meta.hfile"); FSDataOutputStream fout = createFSOutput(mFile); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(AbstractHFileWriter.compressionByName(compress)); + meta.setBlocksize(minBlockSize); Writer writer = HFile.getWriterFactory(conf, cacheConf) .withOutputStream(fout) - .withBlockSize(minBlockSize) - .withCompression(compress) + .withFileContext(meta) .create(); someTestingWithMetaBlock(writer); writer.close(); @@ -324,10 +347,12 @@ public class TestHFile extends HBaseTestCase { HBaseTestingUtility.COMPRESSION_ALGORITHMS) { Path mFile = new Path(ROOT_DIR, "nometa_" + compressAlgo + ".hfile"); FSDataOutputStream fout = createFSOutput(mFile); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo((compressAlgo)); + meta.setBlocksize(minBlockSize); Writer writer = HFile.getWriterFactory(conf, cacheConf) .withOutputStream(fout) - .withBlockSize(minBlockSize) - .withCompression(compressAlgo) + .withFileContext(meta) .create(); writer.append("foo".getBytes(), "value".getBytes()); writer.close(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java index 98a60a6e66f..f12fc6061bc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlock.java @@ -18,7 +18,11 @@ */ package org.apache.hadoop.hbase.io.hfile; -import static org.junit.Assert.*; +import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.GZ; +import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.NONE; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; @@ -33,6 +37,7 @@ import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.Executors; @@ -46,8 +51,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.DoubleOutputStream; import org.apache.hadoop.hbase.io.compress.Compression; @@ -61,9 +67,6 @@ import org.apache.hadoop.hbase.util.ChecksumType; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.compress.Compressor; - -import static org.apache.hadoop.hbase.io.compress.Compression.Algorithm.*; - import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -97,14 +100,15 @@ public class TestHFileBlock { private int uncompressedSizeV1; private final boolean includesMemstoreTS; - - public TestHFileBlock(boolean includesMemstoreTS) { + private final boolean includesTag; + public TestHFileBlock(boolean includesMemstoreTS, boolean includesTag) { this.includesMemstoreTS = includesMemstoreTS; + this.includesTag = includesTag; } @Parameters public static Collection parameters() { - return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED; } @Before @@ -118,7 +122,7 @@ public class TestHFileBlock { dos.writeInt(i / 100); } - static int writeTestKeyValues(OutputStream dos, int seed, boolean includesMemstoreTS) + static int writeTestKeyValues(OutputStream dos, int seed, boolean includesMemstoreTS, boolean useTag) throws IOException { List keyValues = new ArrayList(); Random randomizer = new Random(42l + seed); // just any fixed number @@ -163,24 +167,37 @@ public class TestHFileBlock { } else { timestamp = randomizer.nextLong(); } - - keyValues.add(new KeyValue(row, family, qualifier, timestamp, value)); + if (!useTag) { + keyValues.add(new KeyValue(row, family, qualifier, timestamp, value)); + } else { + keyValues.add(new KeyValue(row, family, qualifier, timestamp, value, new Tag[] { new Tag( + (byte) 1, Bytes.toBytes("myTagVal")) })); + } } // sort it and write to stream int totalSize = 0; - Collections.sort(keyValues, KeyValue.COMPARATOR); + Collections.sort(keyValues, KeyValue.COMPARATOR); DataOutputStream dataOutputStream = new DataOutputStream(dos); + for (KeyValue kv : keyValues) { + dataOutputStream.writeInt(kv.getKeyLength()); + dataOutputStream.writeInt(kv.getValueLength()); + dataOutputStream.write(kv.getBuffer(), kv.getKeyOffset(), kv.getKeyLength()); + dataOutputStream.write(kv.getBuffer(), kv.getValueOffset(), kv.getValueLength()); + // Write the additonal tag into the stream + // always write the taglength totalSize += kv.getLength(); - dataOutputStream.write(kv.getBuffer(), kv.getOffset(), kv.getLength()); + if (useTag) { + dataOutputStream.writeShort(kv.getTagsLength()); + dataOutputStream.write(kv.getBuffer(), kv.getTagsOffset(), kv.getTagsLength()); + } if (includesMemstoreTS) { long memstoreTS = randomizer.nextLong(); WritableUtils.writeVLong(dataOutputStream, memstoreTS); totalSize += WritableUtils.getVIntSize(memstoreTS); } } - return totalSize; } @@ -199,11 +216,15 @@ public class TestHFileBlock { } static HFileBlock.Writer createTestV2Block(Compression.Algorithm algo, - boolean includesMemstoreTS) throws IOException { + boolean includesMemstoreTS, boolean includesTag) throws IOException { final BlockType blockType = BlockType.DATA; - HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, - includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE, - HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM); + meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); DataOutputStream dos = hbw.startWriting(blockType); writeTestBlockContents(dos); dos.flush(); @@ -214,8 +235,8 @@ public class TestHFileBlock { } public String createTestBlockStr(Compression.Algorithm algo, - int correctLength) throws IOException { - HFileBlock.Writer hbw = createTestV2Block(algo, includesMemstoreTS); + int correctLength, boolean useTag) throws IOException { + HFileBlock.Writer hbw = createTestV2Block(algo, includesMemstoreTS, useTag); byte[] testV2Block = hbw.getHeaderAndDataForTest(); int osOffset = HConstants.HFILEBLOCK_HEADER_SIZE + 9; if (testV2Block.length == correctLength) { @@ -231,7 +252,7 @@ public class TestHFileBlock { @Test public void testNoCompression() throws IOException { - assertEquals(4000, createTestV2Block(NONE, includesMemstoreTS). + assertEquals(4000, createTestV2Block(NONE, includesMemstoreTS, false). getBlockForCaching().getUncompressedSizeWithoutHeader()); } @@ -257,7 +278,7 @@ public class TestHFileBlock { + "\\xD46\\xEA5\\xEA3\\xEA7\\xE7\\x00LI\\x5Cs\\xA0\\x0F\\x00\\x00" + "\\x00\\x00\\x00\\x00"; // 4 byte checksum (ignored) final int correctGzipBlockLength = 95; - final String testBlockStr = createTestBlockStr(GZ, correctGzipBlockLength); + final String testBlockStr = createTestBlockStr(GZ, correctGzipBlockLength, false); // We ignore the block checksum because createTestBlockStr can change the // gzip header after the block is produced assertEquals(correctTestBlockStr.substring(0, correctGzipBlockLength - 4), @@ -266,6 +287,13 @@ public class TestHFileBlock { @Test public void testReaderV2() throws IOException { + testReaderV2Internals(); + } + + protected void testReaderV2Internals() throws IOException { + if(includesTag) { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : new boolean[] { false, true }) { LOG.info("testReaderV2: Compression algorithm: " + algo + @@ -273,9 +301,14 @@ public class TestHFileBlock { Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + algo); FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(algo, null, - includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE, - HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM); + meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, + meta); long totalSize = 0; for (int blockId = 0; blockId < 2; ++blockId) { DataOutputStream dos = hbw.startWriting(BlockType.DATA); @@ -287,8 +320,12 @@ public class TestHFileBlock { os.close(); FSDataInputStream is = fs.open(path); - HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo, - totalSize); + meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(algo); + HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, totalSize, meta); HFileBlock b = hbr.readBlockData(0, -1, -1, pread); is.close(); assertEquals(0, HFile.getChecksumFailuresCount()); @@ -301,7 +338,7 @@ public class TestHFileBlock { if (algo == GZ) { is = fs.open(path); - hbr = new HFileBlock.FSReaderV2(is, algo, totalSize); + hbr = new HFileBlock.FSReaderV2(is, totalSize, meta); b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE + b.totalChecksumBytes(), -1, pread); assertEquals(blockStr, b.toString()); @@ -330,7 +367,14 @@ public class TestHFileBlock { */ @Test public void testDataBlockEncoding() throws IOException { + testInternals(); + } + + private void testInternals() throws IOException { final int numBlocks = 5; + if(includesTag) { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : new boolean[] { false, true }) { for (DataBlockEncoding encoding : DataBlockEncoding.values()) { @@ -339,27 +383,35 @@ public class TestHFileBlock { FSDataOutputStream os = fs.create(path); HFileDataBlockEncoder dataBlockEncoder = new HFileDataBlockEncoderImpl(encoding); - HFileBlock.Writer hbw = new HFileBlock.Writer(algo, dataBlockEncoder, - includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE, - HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM); + meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE); + HFileBlock.Writer hbw = new HFileBlock.Writer(dataBlockEncoder, + meta); long totalSize = 0; final List encodedSizes = new ArrayList(); final List encodedBlocks = new ArrayList(); for (int blockId = 0; blockId < numBlocks; ++blockId) { DataOutputStream dos = hbw.startWriting(BlockType.DATA); writeEncodedBlock(algo, encoding, dos, encodedSizes, encodedBlocks, - blockId, includesMemstoreTS, HConstants.HFILEBLOCK_DUMMY_HEADER); + blockId, includesMemstoreTS, HConstants.HFILEBLOCK_DUMMY_HEADER, includesTag); hbw.writeHeaderAndData(os); totalSize += hbw.getOnDiskSizeWithHeader(); } os.close(); FSDataInputStream is = fs.open(path); - HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(is, algo, - totalSize); + meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(algo); + HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(is, totalSize, meta); hbr.setDataBlockEncoder(dataBlockEncoder); hbr.setIncludesMemstoreTS(includesMemstoreTS); - HFileBlock b; int pos = 0; for (int blockId = 0; blockId < numBlocks; ++blockId) { @@ -393,28 +445,31 @@ public class TestHFileBlock { static void writeEncodedBlock(Algorithm algo, DataBlockEncoding encoding, DataOutputStream dos, final List encodedSizes, - final List encodedBlocks, int blockId, - boolean includesMemstoreTS, byte[] dummyHeader) throws IOException { + final List encodedBlocks, int blockId, + boolean includesMemstoreTS, byte[] dummyHeader, boolean useTag) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DoubleOutputStream doubleOutputStream = new DoubleOutputStream(dos, baos); - writeTestKeyValues(doubleOutputStream, blockId, includesMemstoreTS); + writeTestKeyValues(doubleOutputStream, blockId, includesMemstoreTS, useTag); ByteBuffer rawBuf = ByteBuffer.wrap(baos.toByteArray()); rawBuf.rewind(); DataBlockEncoder encoder = encoding.getEncoder(); int headerLen = dummyHeader.length; byte[] encodedResultWithHeader = null; + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(algo); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(useTag); if (encoder != null) { - HFileBlockEncodingContext encodingCtx = - encoder.newDataBlockEncodingContext(algo, encoding, dummyHeader); - encoder.encodeKeyValues(rawBuf, includesMemstoreTS, - encodingCtx); + HFileBlockEncodingContext encodingCtx = encoder.newDataBlockEncodingContext(encoding, + dummyHeader, meta); + encoder.encodeKeyValues(rawBuf, encodingCtx); encodedResultWithHeader = encodingCtx.getUncompressedBytesWithHeader(); } else { - HFileBlockDefaultEncodingContext defaultEncodingCtx = - new HFileBlockDefaultEncodingContext(algo, encoding, dummyHeader); + HFileBlockDefaultEncodingContext defaultEncodingCtx = new HFileBlockDefaultEncodingContext( + encoding, dummyHeader, meta); byte[] rawBufWithHeader = new byte[rawBuf.array().length + headerLen]; System.arraycopy(rawBuf.array(), 0, rawBufWithHeader, @@ -474,6 +529,10 @@ public class TestHFileBlock { @Test public void testPreviousOffset() throws IOException { + testPreviousOffsetInternals(); + } + + protected void testPreviousOffsetInternals() throws IOException { for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : BOOLEAN_VALUES) { for (boolean cacheOnWrite : BOOLEAN_VALUES) { @@ -491,8 +550,12 @@ public class TestHFileBlock { expectedPrevOffsets, expectedTypes, expectedContents); FSDataInputStream is = fs.open(path); - HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, algo, - totalSize); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(algo); + HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, totalSize, meta); long curOffset = 0; for (int i = 0; i < NUM_TEST_BLOCKS; ++i) { if (!pread) { @@ -656,6 +719,11 @@ public class TestHFileBlock { @Test public void testConcurrentReading() throws Exception { + testConcurrentReadingInternals(); + } + + protected void testConcurrentReadingInternals() throws IOException, + InterruptedException, ExecutionException { for (Compression.Algorithm compressAlgo : COMPRESSION_ALGORITHMS) { Path path = new Path(TEST_UTIL.getDataTestDir(), "concurrent_reading"); @@ -665,8 +733,12 @@ public class TestHFileBlock { writeBlocks(rand, compressAlgo, path, offsets, null, types, null); FSDataInputStream is = fs.open(path); long fileSize = fs.getFileStatus(path).getLen(); - HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, compressAlgo, - fileSize); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(compressAlgo); + HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(is, fileSize, meta); Executor exec = Executors.newFixedThreadPool(NUM_READER_THREADS); ExecutorCompletionService ecs = @@ -697,9 +769,14 @@ public class TestHFileBlock { ) throws IOException { boolean cacheOnWrite = expectedContents != null; FSDataOutputStream os = fs.create(path); - HFileBlock.Writer hbw = new HFileBlock.Writer(compressAlgo, null, - includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE, - HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(compressAlgo); + meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM); + meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, meta); Map prevOffsetByType = new HashMap(); long totalSize = 0; for (int i = 0; i < NUM_TEST_BLOCKS; ++i) { @@ -749,6 +826,10 @@ public class TestHFileBlock { @Test public void testBlockHeapSize() { + testBlockHeapSizeInternals(); + } + + protected void testBlockHeapSizeInternals() { if (ClassSize.is32BitJVM()) { assertTrue(HFileBlock.BYTE_BUFFER_HEAP_SIZE == 64); } else { @@ -758,16 +839,24 @@ public class TestHFileBlock { for (int size : new int[] { 100, 256, 12345 }) { byte[] byteArr = new byte[HConstants.HFILEBLOCK_HEADER_SIZE + size]; ByteBuffer buf = ByteBuffer.wrap(byteArr, 0, size); + HFileContext meta = new HFileContext(); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setUsesHBaseChecksum(false); + meta.setCompressAlgo(Algorithm.NONE); + meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM); + meta.setChecksumType(ChecksumType.NULL); + meta.setBytesPerChecksum(0); HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, - HFileBlock.FILL_HEADER, -1, includesMemstoreTS, - HFileBlock.MINOR_VERSION_NO_CHECKSUM, 0, ChecksumType.NULL.getCode(), - 0); + HFileBlock.FILL_HEADER, -1, + 0, meta); long byteBufferExpectedSize = ClassSize.align(ClassSize.estimateBase(buf.getClass(), true) + HConstants.HFILEBLOCK_HEADER_SIZE + size); + long hfileMetaSize = ClassSize.align(ClassSize.estimateBase(HFileContext.class, true)); long hfileBlockExpectedSize = ClassSize.align(ClassSize.estimateBase(HFileBlock.class, true)); - long expected = hfileBlockExpectedSize + byteBufferExpectedSize; + long expected = hfileBlockExpectedSize + byteBufferExpectedSize + hfileMetaSize; assertEquals("Block data size: " + size + ", byte buffer expected " + "size: " + byteBufferExpectedSize + ", HFileBlock class expected " + "size: " + hfileBlockExpectedSize + ";", expected, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java index 7193c88249c..7b2cac5ab35 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockCompatibility.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.util.Bytes; @@ -84,14 +85,16 @@ public class TestHFileBlockCompatibility { private int uncompressedSizeV1; private final boolean includesMemstoreTS; + private final boolean includesTag; - public TestHFileBlockCompatibility(boolean includesMemstoreTS) { + public TestHFileBlockCompatibility(boolean includesMemstoreTS, boolean includesTag) { this.includesMemstoreTS = includesMemstoreTS; + this.includesTag = includesTag; } @Parameters public static Collection parameters() { - return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + return HBaseTestingUtility.MEMSTORETS_TAGS_PARAMETRIZED; } @Before @@ -117,7 +120,7 @@ public class TestHFileBlockCompatibility { throws IOException { final BlockType blockType = BlockType.DATA; Writer hbw = new Writer(algo, null, - includesMemstoreTS); + includesMemstoreTS, includesTag); DataOutputStream dos = hbw.startWriting(blockType); TestHFileBlock.writeTestBlockContents(dos); // make sure the block is ready by calling hbw.getHeaderAndData() @@ -144,7 +147,7 @@ public class TestHFileBlockCompatibility { @Test public void testNoCompression() throws IOException { assertEquals(4000, createTestV2Block(NONE).getBlockForCaching(). - getUncompressedSizeWithoutHeader()); + getUncompressedSizeWithoutHeader()); } @Test @@ -172,6 +175,9 @@ public class TestHFileBlockCompatibility { @Test public void testReaderV2() throws IOException { + if(includesTag) { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : new boolean[] { false, true }) { LOG.info("testReaderV2: Compression algorithm: " + algo + @@ -180,7 +186,7 @@ public class TestHFileBlockCompatibility { + algo); FSDataOutputStream os = fs.create(path); Writer hbw = new Writer(algo, null, - includesMemstoreTS); + includesMemstoreTS, includesTag); long totalSize = 0; for (int blockId = 0; blockId < 2; ++blockId) { DataOutputStream dos = hbw.startWriting(BlockType.DATA); @@ -192,8 +198,13 @@ public class TestHFileBlockCompatibility { os.close(); FSDataInputStream is = fs.open(path); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(algo); HFileBlock.FSReader hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is), - algo, totalSize, MINOR_VERSION, fs, path); + totalSize, fs, path, meta); HFileBlock b = hbr.readBlockData(0, -1, -1, pread); is.close(); @@ -205,8 +216,8 @@ public class TestHFileBlockCompatibility { if (algo == GZ) { is = fs.open(path); - hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is), - algo, totalSize, MINOR_VERSION, fs, path); + hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is), totalSize, fs, path, + meta); b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM + b.totalChecksumBytes(), -1, pread); assertEquals(blockStr, b.toString()); @@ -235,6 +246,9 @@ public class TestHFileBlockCompatibility { */ @Test public void testDataBlockEncoding() throws IOException { + if(includesTag) { + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } final int numBlocks = 5; for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) { for (boolean pread : new boolean[] { false, true }) { @@ -250,7 +264,7 @@ public class TestHFileBlockCompatibility { TestHFileBlockCompatibility.Writer.DUMMY_HEADER); TestHFileBlockCompatibility.Writer hbw = new TestHFileBlockCompatibility.Writer(algo, - dataBlockEncoder, includesMemstoreTS); + dataBlockEncoder, includesMemstoreTS, includesTag); long totalSize = 0; final List encodedSizes = new ArrayList(); final List encodedBlocks = new ArrayList(); @@ -258,7 +272,7 @@ public class TestHFileBlockCompatibility { DataOutputStream dos = hbw.startWriting(BlockType.DATA); TestHFileBlock.writeEncodedBlock(algo, encoding, dos, encodedSizes, encodedBlocks, blockId, includesMemstoreTS, - TestHFileBlockCompatibility.Writer.DUMMY_HEADER); + TestHFileBlockCompatibility.Writer.DUMMY_HEADER, includesTag); hbw.writeHeaderAndData(os); totalSize += hbw.getOnDiskSizeWithHeader(); @@ -266,8 +280,13 @@ public class TestHFileBlockCompatibility { os.close(); FSDataInputStream is = fs.open(path); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(algo); HFileBlock.FSReaderV2 hbr = new HFileBlock.FSReaderV2(new FSDataInputStreamWrapper(is), - algo, totalSize, MINOR_VERSION, fs, path); + totalSize, fs, path, meta); hbr.setDataBlockEncoder(dataBlockEncoder); hbr.setIncludesMemstoreTS(includesMemstoreTS); @@ -301,9 +320,6 @@ public class TestHFileBlockCompatibility { } } } - - - /** * This is the version of the HFileBlock.Writer that is used to * create V2 blocks with minor version 0. These blocks do not @@ -392,33 +408,34 @@ public class TestHFileBlockCompatibility { /** The offset of the previous block of the same type */ private long prevOffset; - /** Whether we are including memstore timestamp after every key/value */ - private boolean includesMemstoreTS; + private HFileContext meta; /** * @param compressionAlgorithm compression algorithm to use * @param dataBlockEncoderAlgo data block encoding algorithm to use */ public Writer(Compression.Algorithm compressionAlgorithm, - HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS) { + HFileDataBlockEncoder dataBlockEncoder, boolean includesMemstoreTS, boolean includesTag) { compressAlgo = compressionAlgorithm == null ? NONE : compressionAlgorithm; this.dataBlockEncoder = dataBlockEncoder != null ? dataBlockEncoder : NoOpDataBlockEncoder.INSTANCE; - defaultBlockEncodingCtx = - new HFileBlockDefaultEncodingContext(compressionAlgorithm, - null, DUMMY_HEADER); + meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(includesTag); + meta.setCompressAlgo(compressionAlgorithm); + + defaultBlockEncodingCtx = new HFileBlockDefaultEncodingContext(null, DUMMY_HEADER, meta); dataBlockEncodingCtx = - this.dataBlockEncoder.newOnDiskDataBlockEncodingContext( - compressionAlgorithm, DUMMY_HEADER); - + this.dataBlockEncoder.newOnDiskDataBlockEncodingContext( + DUMMY_HEADER, meta); baosInMemory = new ByteArrayOutputStream(); prevOffsetByType = new long[BlockType.values().length]; for (int i = 0; i < prevOffsetByType.length; ++i) prevOffsetByType[i] = -1; - this.includesMemstoreTS = includesMemstoreTS; } /** @@ -521,8 +538,7 @@ public class TestHFileBlockCompatibility { uncompressedBytesWithHeader.length - HEADER_SIZE).slice(); //do the encoding - dataBlockEncoder.beforeWriteToDisk(rawKeyValues, - includesMemstoreTS, dataBlockEncodingCtx, blockType); + dataBlockEncoder.beforeWriteToDisk(rawKeyValues, dataBlockEncodingCtx, blockType); uncompressedBytesWithHeader = dataBlockEncodingCtx.getUncompressedBytesWithHeader(); @@ -714,11 +730,13 @@ public class TestHFileBlockCompatibility { * Creates a new HFileBlock. */ public HFileBlock getBlockForCaching() { + meta.setUsesHBaseChecksum(false); + meta.setChecksumType(ChecksumType.NULL); + meta.setBytesPerChecksum(0); return new HFileBlock(blockType, getOnDiskSizeWithoutHeader(), getUncompressedSizeWithoutHeader(), prevOffset, - getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset, - includesMemstoreTS, MINOR_VERSION, 0, ChecksumType.NULL.getCode(), - getOnDiskSizeWithoutHeader()); + getUncompressedBufferWithHeader(), DONT_FILL_HEADER, startOffset, + getOnDiskSizeWithoutHeader(), meta); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java index 9fc9e9c4d03..7101291bba7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java @@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.fs.HFileSystem; @@ -118,9 +119,27 @@ public class TestHFileBlockIndex { @Test public void testBlockIndex() throws IOException { - path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr); - writeWholeIndex(); - readIndex(); + testBlockIndexInternals(false); + clear(); + testBlockIndexInternals(true); + } + + private void clear() throws IOException { + keys.clear(); + rand = new Random(2389757); + firstKeyInFile = null; + conf = TEST_UTIL.getConfiguration(); + + // This test requires at least HFile format version 2. + conf.setInt(HFile.FORMAT_VERSION_KEY, 3); + + fs = HFileSystem.get(conf); + } + + protected void testBlockIndexInternals(boolean useTags) throws IOException { + path = new Path(TEST_UTIL.getDataTestDir(), "block_index_" + compr + useTags); + writeWholeIndex(useTags); + readIndex(useTags); } /** @@ -164,13 +183,18 @@ public class TestHFileBlockIndex { } } - public void readIndex() throws IOException { + public void readIndex(boolean useTags) throws IOException { long fileSize = fs.getFileStatus(path).getLen(); LOG.info("Size of " + path + ": " + fileSize); FSDataInputStream istream = fs.open(path); - HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream, - compr, fs.getFileStatus(path).getLen()); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(useTags); + meta.setCompressAlgo(compr); + HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(istream, fs.getFileStatus(path) + .getLen(), meta); BlockReaderWrapper brw = new BlockReaderWrapper(blockReader); HFileBlockIndex.BlockIndexReader indexReader = @@ -215,11 +239,17 @@ public class TestHFileBlockIndex { istream.close(); } - private void writeWholeIndex() throws IOException { + private void writeWholeIndex(boolean useTags) throws IOException { assertEquals(0, keys.size()); - HFileBlock.Writer hbw = new HFileBlock.Writer(compr, null, - includesMemstoreTS, HFile.DEFAULT_CHECKSUM_TYPE, - HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(useTags); + meta.setCompressAlgo(compr); + meta.setChecksumType(HFile.DEFAULT_CHECKSUM_TYPE); + meta.setBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM); + HFileBlock.Writer hbw = new HFileBlock.Writer(null, + meta); FSDataOutputStream outputStream = fs.create(path); HFileBlockIndex.BlockIndexWriter biw = new HFileBlockIndex.BlockIndexWriter(hbw, null, null); @@ -486,11 +516,13 @@ public class TestHFileBlockIndex { // Write the HFile { + HFileContext meta = new HFileContext(); + meta.setBlocksize(SMALL_BLOCK_SIZE); + meta.setCompressAlgo(compr); HFile.Writer writer = HFile.getWriterFactory(conf, cacheConf) .withPath(fs, hfilePath) - .withBlockSize(SMALL_BLOCK_SIZE) - .withCompression(compr) + .withFileContext(meta) .create(); Random rand = new Random(19231737); @@ -502,7 +534,7 @@ public class TestHFileBlockIndex { row, 0, 0).getKey(); byte[] v = TestHFileWriterV2.randomValue(rand); - writer.append(k, v); + writer.append(k, v, HConstants.EMPTY_BYTE_ARRAY); keys[i] = k; values[i] = v; keyStrSet.add(Bytes.toStringBinary(k)); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java index 6a5a742c5b2..4e48ed9df89 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileDataBlockEncoder.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; @@ -80,7 +81,12 @@ public class TestHFileDataBlockEncoder { */ @Test public void testEncodingWithCache() { - HFileBlock block = getSampleHFileBlock(); + testEncodingWithCacheInternals(false); + testEncodingWithCacheInternals(true); + } + + private void testEncodingWithCacheInternals(boolean useTag) { + HFileBlock block = getSampleHFileBlock(useTag); LruBlockCache blockCache = new LruBlockCache(8 * 1024 * 1024, 32 * 1024); HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(block, false); @@ -107,36 +113,47 @@ public class TestHFileDataBlockEncoder { /** Test for HBASE-5746. */ @Test public void testHeaderSizeInCacheWithoutChecksum() throws Exception { + testHeaderSizeInCacheWithoutChecksumInternals(false); + testHeaderSizeInCacheWithoutChecksumInternals(true); + } + + private void testHeaderSizeInCacheWithoutChecksumInternals(boolean useTags) throws IOException { int headerSize = HConstants.HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM; // Create some KVs and create the block with old-style header. ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer( - generator.generateTestKeyValues(60), includesMemstoreTS); + generator.generateTestKeyValues(60, useTags), includesMemstoreTS); int size = keyValues.limit(); ByteBuffer buf = ByteBuffer.allocate(size + headerSize); buf.position(headerSize); keyValues.rewind(); buf.put(keyValues); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(false); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(useTags); + meta.setCompressAlgo(Compression.Algorithm.NONE); + meta.setBlocksize(0); + meta.setChecksumType(ChecksumType.NULL); HFileBlock block = new HFileBlock(BlockType.DATA, size, size, -1, buf, - HFileBlock.FILL_HEADER, 0, includesMemstoreTS, - HFileBlock.MINOR_VERSION_NO_CHECKSUM, 0, ChecksumType.NULL.getCode(), 0); - HFileBlock cacheBlock = blockEncoder.diskToCacheFormat(createBlockOnDisk(block), false); + HFileBlock.FILL_HEADER, 0, + 0, meta); + HFileBlock cacheBlock = blockEncoder + .diskToCacheFormat(createBlockOnDisk(block, useTags), false); assertEquals(headerSize, cacheBlock.getDummyHeaderForVersion().length); } - private HFileBlock createBlockOnDisk(HFileBlock block) throws IOException { + private HFileBlock createBlockOnDisk(HFileBlock block, boolean useTags) throws IOException { int size; HFileBlockEncodingContext context = new HFileBlockDefaultEncodingContext( - Compression.Algorithm.NONE, blockEncoder.getEncodingOnDisk(), - HConstants.HFILEBLOCK_DUMMY_HEADER); + blockEncoder.getEncodingOnDisk(), + HConstants.HFILEBLOCK_DUMMY_HEADER, block.getHFileContext()); context.setDummyHeader(block.getDummyHeaderForVersion()); - blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(), - includesMemstoreTS, context, block.getBlockType()); + blockEncoder.beforeWriteToDisk(block.getBufferWithoutHeader(), context, block.getBlockType()); byte[] encodedBytes = context.getUncompressedBytesWithHeader(); size = encodedBytes.length - block.getDummyHeaderForVersion().length; return new HFileBlock(context.getBlockType(), size, size, -1, - ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0, includesMemstoreTS, - block.getMinorVersion(), block.getBytesPerChecksum(), block.getChecksumType(), - block.getOnDiskDataSizeWithHeader()); + ByteBuffer.wrap(encodedBytes), HFileBlock.FILL_HEADER, 0, + block.getOnDiskDataSizeWithHeader(), block.getHFileContext()); } /** @@ -145,9 +162,14 @@ public class TestHFileDataBlockEncoder { */ @Test public void testEncodingWritePath() throws IOException { + testEncodingWritePathInternals(false); + testEncodingWritePathInternals(true); + } + + private void testEncodingWritePathInternals(boolean useTag) throws IOException { // usually we have just block without headers, but don't complicate that - HFileBlock block = getSampleHFileBlock(); - HFileBlock blockOnDisk = createBlockOnDisk(block); + HFileBlock block = getSampleHFileBlock(useTag); + HFileBlock blockOnDisk = createBlockOnDisk(block, useTag); if (blockEncoder.getEncodingOnDisk() != DataBlockEncoding.NONE) { @@ -164,21 +186,33 @@ public class TestHFileDataBlockEncoder { */ @Test public void testEncodingReadPath() { - HFileBlock origBlock = getSampleHFileBlock(); + testEncodingReadPathInternals(false); + testEncodingReadPathInternals(true); + } + + private void testEncodingReadPathInternals(boolean useTag) { + HFileBlock origBlock = getSampleHFileBlock(useTag); blockEncoder.diskToCacheFormat(origBlock, false); } - private HFileBlock getSampleHFileBlock() { + private HFileBlock getSampleHFileBlock(boolean useTag) { ByteBuffer keyValues = RedundantKVGenerator.convertKvToByteBuffer( - generator.generateTestKeyValues(60), includesMemstoreTS); + generator.generateTestKeyValues(60, useTag), includesMemstoreTS); int size = keyValues.limit(); ByteBuffer buf = ByteBuffer.allocate(size + HConstants.HFILEBLOCK_HEADER_SIZE); buf.position(HConstants.HFILEBLOCK_HEADER_SIZE); keyValues.rewind(); buf.put(keyValues); + HFileContext meta = new HFileContext(); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(useTag); + meta.setUsesHBaseChecksum(true); + meta.setCompressAlgo(Algorithm.NONE); + meta.setBlocksize(0); + meta.setChecksumType(ChecksumType.NULL); HFileBlock b = new HFileBlock(BlockType.DATA, size, size, -1, buf, - HFileBlock.FILL_HEADER, 0, includesMemstoreTS, - HFileReaderV2.MAX_MINOR_VERSION, 0, ChecksumType.NULL.getCode(), 0); + HFileBlock.FILL_HEADER, 0, + 0, meta); return b; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java index f375b659068..c850d3a1a43 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileInlineToRootChunkConversion.java @@ -52,9 +52,11 @@ public class TestHFileInlineToRootChunkConversion { FileSystem fs = FileSystem.get(conf); CacheConfig cacheConf = new CacheConfig(conf); conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, maxChunkSize); + HFileContext context = new HFileContext(); + context.setBlocksize(16); HFileWriterV2 hfw = (HFileWriterV2) new HFileWriterV2.WriterFactoryV2(conf, cacheConf) - .withBlockSize(16) + .withFileContext(context) .withPath(fs, hfPath).create(); List keys = new ArrayList(); StringBuilder sb = new StringBuilder(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java index 4276b2a101b..9e1dd19b3ef 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java @@ -161,11 +161,13 @@ public class TestHFilePerformance extends TestCase { FSDataOutputStream fout = createFSOutput(path); if ("HFile".equals(fileType)){ + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(AbstractHFileWriter.compressionByName(codecName)); + meta.setBlocksize(minBlockSize); System.out.println("HFile write method: "); HFile.Writer writer = HFile.getWriterFactoryNoCache(conf) .withOutputStream(fout) - .withBlockSize(minBlockSize) - .withCompression(codecName) + .withFileContext(meta) .withComparator(new KeyValue.RawBytesComparator()) .create(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java index 57971220b74..1a26bb55844 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java @@ -127,10 +127,12 @@ public class TestHFileSeek extends TestCase { long totalBytes = 0; FSDataOutputStream fout = createFSOutput(path, fs); try { + HFileContext context = new HFileContext(); + context.setBlocksize(options.minBlockSize); + context.setCompressAlgo(AbstractHFileWriter.compressionByName(options.compress)); Writer writer = HFile.getWriterFactoryNoCache(conf) .withOutputStream(fout) - .withBlockSize(options.minBlockSize) - .withCompression(options.compress) + .withFileContext(context) .withComparator(new KeyValue.RawBytesComparator()) .create(); try { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java index 5beca602471..f01b5c9dede 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hbase.KeyValue.KVComparator; import org.apache.hadoop.hbase.SmallTests; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; @@ -93,11 +94,13 @@ public class TestHFileWriterV2 { private void writeDataAndReadFromHFile(Path hfilePath, Algorithm compressAlgo, int entryCount, boolean findMidKey) throws IOException { + HFileContext context = new HFileContext(); + context.setBlocksize(4096); + context.setCompressAlgo(compressAlgo); HFileWriterV2 writer = (HFileWriterV2) new HFileWriterV2.WriterFactoryV2(conf, new CacheConfig(conf)) .withPath(fs, hfilePath) - .withBlockSize(4096) - .withCompression(compressAlgo) + .withFileContext(context) .create(); Random rand = new Random(9713312); // Just a fixed seed. @@ -134,8 +137,13 @@ public class TestHFileWriterV2 { assertEquals(2, trailer.getMajorVersion()); assertEquals(entryCount, trailer.getEntryCount()); - HFileBlock.FSReader blockReader = - new HFileBlock.FSReaderV2(fsdis, compressAlgo, fileSize); + HFileContext meta = new HFileContext(); + meta.setUsesHBaseChecksum(true); + meta.setIncludesMvcc(false); + meta.setIncludesTags(false); + meta.setCompressAlgo(compressAlgo); + + HFileBlock.FSReader blockReader = new HFileBlock.FSReaderV2(fsdis, fileSize, meta); // Comparator class name is stored in the trailer in version 2. KVComparator comparator = trailer.createComparator(); HFileBlockIndex.BlockIndexReader dataBlockIndexReader = diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java new file mode 100644 index 00000000000..183ff443324 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java @@ -0,0 +1,298 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Random; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.KVComparator; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; +import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Writables; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableUtils; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Testing writing a version 3 {@link HFile}. This is a low-level test written + * during the development of {@link HFileWriterV3}. + */ +@RunWith(Parameterized.class) +@Category(SmallTests.class) +public class TestHFileWriterV3 { + + private static final Log LOG = LogFactory.getLog(TestHFileWriterV3.class); + + private static final HBaseTestingUtility TEST_UTIL = + new HBaseTestingUtility(); + + private Configuration conf; + private FileSystem fs; + private boolean useTags; + public TestHFileWriterV3(boolean useTags) { + this.useTags = useTags; + } + @Parameters + public static Collection parameters() { + return HBaseTestingUtility.BOOLEAN_PARAMETERIZED; + } + + @Before + public void setUp() throws IOException { + conf = TEST_UTIL.getConfiguration(); + fs = FileSystem.get(conf); + } + + @Test + public void testHFileFormatV3() throws IOException { + testHFileFormatV3Internals(useTags); + } + + private void testHFileFormatV3Internals(boolean useTags) throws IOException { + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), "testHFileFormatV3"); + final Compression.Algorithm compressAlgo = Compression.Algorithm.GZ; + final int entryCount = 10000; + writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, false, useTags); + } + + @Test + public void testMidKeyInHFile() throws IOException{ + testMidKeyInHFileInternals(useTags); + } + + private void testMidKeyInHFileInternals(boolean useTags) throws IOException { + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), + "testMidKeyInHFile"); + Compression.Algorithm compressAlgo = Compression.Algorithm.NONE; + int entryCount = 50000; + writeDataAndReadFromHFile(hfilePath, compressAlgo, entryCount, true, useTags); + } + + private void writeDataAndReadFromHFile(Path hfilePath, + Algorithm compressAlgo, int entryCount, boolean findMidKey, boolean useTags) throws IOException { + HFileContext context = new HFileContext(); + context.setBlocksize(4096); + context.setCompressAlgo(compressAlgo); + HFileWriterV3 writer = (HFileWriterV3) + new HFileWriterV3.WriterFactoryV3(conf, new CacheConfig(conf)) + .withPath(fs, hfilePath) + .withFileContext(context) + .withComparator(KeyValue.COMPARATOR) + .create(); + + Random rand = new Random(9713312); // Just a fixed seed. + List keyValues = new ArrayList(entryCount); + + for (int i = 0; i < entryCount; ++i) { + byte[] keyBytes = TestHFileWriterV2.randomOrderedKey(rand, i); + + // A random-length random value. + byte[] valueBytes = TestHFileWriterV2.randomValue(rand); + KeyValue keyValue = null; + if (useTags) { + ArrayList tags = new ArrayList(); + for (int j = 0; j < 1 + rand.nextInt(4); j++) { + byte[] tagBytes = new byte[16]; + rand.nextBytes(tagBytes); + tags.add(new Tag((byte) 1, tagBytes)); + } + keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, + valueBytes, tags); + } else { + keyValue = new KeyValue(keyBytes, null, null, HConstants.LATEST_TIMESTAMP, + valueBytes); + } + writer.append(keyValue); + keyValues.add(keyValue); + } + + // Add in an arbitrary order. They will be sorted lexicographically by + // the key. + writer.appendMetaBlock("CAPITAL_OF_USA", new Text("Washington, D.C.")); + writer.appendMetaBlock("CAPITAL_OF_RUSSIA", new Text("Moscow")); + writer.appendMetaBlock("CAPITAL_OF_FRANCE", new Text("Paris")); + + writer.close(); + + + FSDataInputStream fsdis = fs.open(hfilePath); + + long fileSize = fs.getFileStatus(hfilePath).getLen(); + FixedFileTrailer trailer = + FixedFileTrailer.readFromStream(fsdis, fileSize); + + assertEquals(3, trailer.getMajorVersion()); + assertEquals(entryCount, trailer.getEntryCount()); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(compressAlgo); + meta.setIncludesMvcc(false); + meta.setIncludesTags(useTags); + meta.setUsesHBaseChecksum(true); + HFileBlock.FSReader blockReader = + new HFileBlock.FSReaderV2(fsdis, fileSize, meta); + // Comparator class name is stored in the trailer in version 2. + KVComparator comparator = trailer.createComparator(); + HFileBlockIndex.BlockIndexReader dataBlockIndexReader = + new HFileBlockIndex.BlockIndexReader(comparator, + trailer.getNumDataIndexLevels()); + HFileBlockIndex.BlockIndexReader metaBlockIndexReader = + new HFileBlockIndex.BlockIndexReader( + KeyValue.RAW_COMPARATOR, 1); + + HFileBlock.BlockIterator blockIter = blockReader.blockRange( + trailer.getLoadOnOpenDataOffset(), + fileSize - trailer.getTrailerSize()); + // Data index. We also read statistics about the block index written after + // the root level. + dataBlockIndexReader.readMultiLevelIndexRoot( + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), + trailer.getDataIndexCount()); + + if (findMidKey) { + byte[] midkey = dataBlockIndexReader.midkey(); + assertNotNull("Midkey should not be null", midkey); + } + + // Meta index. + metaBlockIndexReader.readRootIndex( + blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX).getByteStream(), + trailer.getMetaIndexCount()); + // File info + FileInfo fileInfo = new FileInfo(); + fileInfo.read(blockIter.nextBlockWithBlockType(BlockType.FILE_INFO).getByteStream()); + byte [] keyValueFormatVersion = fileInfo.get( + HFileWriterV3.KEY_VALUE_VERSION); + boolean includeMemstoreTS = keyValueFormatVersion != null && + Bytes.toInt(keyValueFormatVersion) > 0; + + // Counters for the number of key/value pairs and the number of blocks + int entriesRead = 0; + int blocksRead = 0; + long memstoreTS = 0; + + // Scan blocks the way the reader would scan them + fsdis.seek(0); + long curBlockPos = 0; + while (curBlockPos <= trailer.getLastDataBlockOffset()) { + HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false); + assertEquals(BlockType.DATA, block.getBlockType()); + ByteBuffer buf = block.getBufferWithoutHeader(); + int keyLen = -1; + while (buf.hasRemaining()) { + + keyLen = buf.getInt(); + + int valueLen = buf.getInt(); + + byte[] key = new byte[keyLen]; + buf.get(key); + + byte[] value = new byte[valueLen]; + buf.get(value); + byte[] tagValue = null; + int tagLen = buf.getShort(); + tagValue = new byte[tagLen]; + buf.get(tagValue); + + if (includeMemstoreTS) { + ByteArrayInputStream byte_input = new ByteArrayInputStream(buf.array(), buf.arrayOffset() + + buf.position(), buf.remaining()); + DataInputStream data_input = new DataInputStream(byte_input); + + memstoreTS = WritableUtils.readVLong(data_input); + buf.position(buf.position() + WritableUtils.getVIntSize(memstoreTS)); + } + + // A brute-force check to see that all keys and values are correct. + assertTrue(Bytes.compareTo(key, keyValues.get(entriesRead).getKey()) == 0); + assertTrue(Bytes.compareTo(value, keyValues.get(entriesRead).getValue()) == 0); + if (useTags) { + assertNotNull(tagValue); + KeyValue tkv = keyValues.get(entriesRead); + assertEquals(tagValue.length, tkv.getTagsLength()); + assertTrue(Bytes.compareTo(tagValue, 0, tagValue.length, tkv.getBuffer(), + tkv.getTagsOffset(), tkv.getTagsLength()) == 0); + } + ++entriesRead; + } + ++blocksRead; + curBlockPos += block.getOnDiskSizeWithHeader(); + } + LOG.info("Finished reading: entries=" + entriesRead + ", blocksRead=" + + blocksRead); + assertEquals(entryCount, entriesRead); + + // Meta blocks. We can scan until the load-on-open data offset (which is + // the root block index offset in version 2) because we are not testing + // intermediate-level index blocks here. + + int metaCounter = 0; + while (fsdis.getPos() < trailer.getLoadOnOpenDataOffset()) { + LOG.info("Current offset: " + fsdis.getPos() + ", scanning until " + + trailer.getLoadOnOpenDataOffset()); + HFileBlock block = blockReader.readBlockData(curBlockPos, -1, -1, false); + assertEquals(BlockType.META, block.getBlockType()); + Text t = new Text(); + ByteBuffer buf = block.getBufferWithoutHeader(); + if (Writables.getWritable(buf.array(), buf.arrayOffset(), buf.limit(), t) == null) { + throw new IOException("Failed to deserialize block " + this + " into a " + t.getClass().getSimpleName()); + } + Text expectedText = + (metaCounter == 0 ? new Text("Paris") : metaCounter == 1 ? new Text( + "Moscow") : new Text("Washington, D.C.")); + assertEquals(expectedText, t); + LOG.info("Read meta block data: " + t); + ++metaCounter; + curBlockPos += block.getOnDiskSizeWithHeader(); + } + + fsdis.close(); + } + +} + diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java index 4505c7bfd57..d905f8ba2d3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java @@ -20,14 +20,17 @@ package org.apache.hadoop.hbase.io.hfile; import static org.junit.Assert.assertEquals; +import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -42,14 +45,24 @@ public class TestReseekTo { @Test public void testReseekTo() throws Exception { + testReseekToInternals(TagUsage.NO_TAG); + testReseekToInternals(TagUsage.ONLY_TAG); + testReseekToInternals(TagUsage.PARTIAL_TAG); + } + private void testReseekToInternals(TagUsage tagUsage) throws IOException { Path ncTFile = new Path(TEST_UTIL.getDataTestDir(), "basic.hfile"); FSDataOutputStream fout = TEST_UTIL.getTestFileSystem().create(ncTFile); + if(tagUsage != TagUsage.NO_TAG){ + TEST_UTIL.getConfiguration().setInt("hfile.format.version", 3); + } CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration()); + HFileContext context = new HFileContext(); + context.setBlocksize(4000); HFile.Writer writer = HFile.getWriterFactory( TEST_UTIL.getConfiguration(), cacheConf) .withOutputStream(fout) - .withBlockSize(4000) + .withFileContext(context) // NOTE: This test is dependent on this deprecated nonstandard comparator .withComparator(new KeyValue.RawBytesComparator()) .create(); @@ -64,7 +77,19 @@ public class TestReseekTo { String value = valueString + key; keyList.add(key); valueList.add(value); - writer.append(Bytes.toBytes(key), Bytes.toBytes(value)); + if(tagUsage == TagUsage.NO_TAG){ + writer.append(Bytes.toBytes(key), Bytes.toBytes(value)); + } else if (tagUsage == TagUsage.ONLY_TAG) { + Tag t = new Tag((byte) 1, "myTag1"); + writer.append(Bytes.toBytes(key), Bytes.toBytes(value), t.getBuffer()); + } else { + if (key % 4 == 0) { + Tag t = new Tag((byte) 1, "myTag1"); + writer.append(Bytes.toBytes(key), Bytes.toBytes(value), t.getBuffer()); + } else { + writer.append(Bytes.toBytes(key), Bytes.toBytes(value), HConstants.EMPTY_BYTE_ARRAY); + } + } } writer.close(); fout.close(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java index 4ea9d59ea4f..973124f78f6 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java @@ -33,210 +33,262 @@ import org.junit.experimental.categories.Category; @Category(SmallTests.class) public class TestSeekTo extends HBaseTestCase { - static KeyValue toKV(String row) { - return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes - .toBytes("qualifier"), Bytes.toBytes("value")); - } + static boolean switchKVs = false; + static KeyValue toKV(String row, TagUsage tagUsage) { + if (tagUsage == TagUsage.NO_TAG) { + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), + Bytes.toBytes("value")); + } else if (tagUsage == TagUsage.ONLY_TAG) { + Tag t = new Tag((byte) 1, "myTag1"); + Tag[] tags = new Tag[1]; + tags[0] = t; + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), Bytes.toBytes("qualifier"), + HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); + } else { + if (!switchKVs) { + switchKVs = true; + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), + Bytes.toBytes("qualifier"), Bytes.toBytes("value")); + } else { + switchKVs = false; + Tag t = new Tag((byte) 1, "myTag1"); + Tag[] tags = new Tag[1]; + tags[0] = t; + return new KeyValue(Bytes.toBytes(row), Bytes.toBytes("family"), + Bytes.toBytes("qualifier"), HConstants.LATEST_TIMESTAMP, Bytes.toBytes("value"), tags); + } + } + } static String toRowStr(KeyValue kv) { return Bytes.toString(kv.getRow()); } - Path makeNewFile() throws IOException { + Path makeNewFile(TagUsage tagUsage) throws IOException { Path ncTFile = new Path(this.testDir, "basic.hfile"); + if (tagUsage != TagUsage.NO_TAG) { + conf.setInt("hfile.format.version", 3); + } else { + conf.setInt("hfile.format.version", 2); + } FSDataOutputStream fout = this.fs.create(ncTFile); - int blocksize = toKV("a").getLength() * 3; - HFile.Writer writer = HFile.getWriterFactoryNoCache(conf) - .withOutputStream(fout) - .withBlockSize(blocksize) - // NOTE: This test is dependent on this deprecated nonstandard comparator - .withComparator(KeyValue.RAW_COMPARATOR) - .create(); + int blocksize = toKV("a", tagUsage).getLength() * 3; + HFileContext context = new HFileContext(); + context.setBlocksize(blocksize); + HFile.Writer writer = HFile.getWriterFactoryNoCache(conf).withOutputStream(fout) + .withFileContext(context) + // NOTE: This test is dependent on this deprecated nonstandard + // comparator + .withComparator(KeyValue.RAW_COMPARATOR).create(); // 4 bytes * 3 * 2 for each key/value + // 3 for keys, 15 for values = 42 (woot) - writer.append(toKV("c")); - writer.append(toKV("e")); - writer.append(toKV("g")); + writer.append(toKV("c", tagUsage)); + writer.append(toKV("e", tagUsage)); + writer.append(toKV("g", tagUsage)); // block transition - writer.append(toKV("i")); - writer.append(toKV("k")); + writer.append(toKV("i", tagUsage)); + writer.append(toKV("k", tagUsage)); writer.close(); fout.close(); return ncTFile; } public void testSeekBefore() throws Exception { - Path p = makeNewFile(); + testSeekBeforeInternals(TagUsage.NO_TAG); + testSeekBeforeInternals(TagUsage.ONLY_TAG); + testSeekBeforeInternals(TagUsage.PARTIAL_TAG); + } + + protected void testSeekBeforeInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf)); reader.loadFileInfo(); HFileScanner scanner = reader.getScanner(false, true); - assertEquals(false, scanner.seekBefore(toKV("a").getKey())); + assertEquals(false, scanner.seekBefore(toKV("a", tagUsage).getKey())); - assertEquals(false, scanner.seekBefore(toKV("c").getKey())); + assertEquals(false, scanner.seekBefore(toKV("c", tagUsage).getKey())); - assertEquals(true, scanner.seekBefore(toKV("d").getKey())); + assertEquals(true, scanner.seekBefore(toKV("d", tagUsage).getKey())); assertEquals("c", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("e").getKey())); + assertEquals(true, scanner.seekBefore(toKV("e", tagUsage).getKey())); assertEquals("c", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("f").getKey())); + assertEquals(true, scanner.seekBefore(toKV("f", tagUsage).getKey())); assertEquals("e", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("g").getKey())); + assertEquals(true, scanner.seekBefore(toKV("g", tagUsage).getKey())); assertEquals("e", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("h").getKey())); + assertEquals(true, scanner.seekBefore(toKV("h", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("i").getKey())); + assertEquals(true, scanner.seekBefore(toKV("i", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("j").getKey())); + assertEquals(true, scanner.seekBefore(toKV("j", tagUsage).getKey())); assertEquals("i", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("k").getKey())); + assertEquals(true, scanner.seekBefore(toKV("k", tagUsage).getKey())); assertEquals("i", toRowStr(scanner.getKeyValue())); - assertEquals(true, scanner.seekBefore(toKV("l").getKey())); + assertEquals(true, scanner.seekBefore(toKV("l", tagUsage).getKey())); assertEquals("k", toRowStr(scanner.getKeyValue())); reader.close(); } public void testSeekBeforeWithReSeekTo() throws Exception { - Path p = makeNewFile(); + testSeekBeforeWithReSeekToInternals(TagUsage.NO_TAG); + testSeekBeforeWithReSeekToInternals(TagUsage.ONLY_TAG); + testSeekBeforeWithReSeekToInternals(TagUsage.PARTIAL_TAG); + } + + protected void testSeekBeforeWithReSeekToInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf)); reader.loadFileInfo(); HFileScanner scanner = reader.getScanner(false, true); - assertEquals(false, scanner.seekBefore(toKV("a").getKey())); - assertEquals(false, scanner.seekBefore(toKV("b").getKey())); - assertEquals(false, scanner.seekBefore(toKV("c").getKey())); + assertEquals(false, scanner.seekBefore(toKV("a", tagUsage).getKey())); + assertEquals(false, scanner.seekBefore(toKV("b", tagUsage).getKey())); + assertEquals(false, scanner.seekBefore(toKV("c", tagUsage).getKey())); // seekBefore d, so the scanner points to c - assertEquals(true, scanner.seekBefore(toKV("d").getKey())); + assertEquals(true, scanner.seekBefore(toKV("d", tagUsage).getKey())); assertEquals("c", toRowStr(scanner.getKeyValue())); // reseekTo e and g - assertEquals(0, scanner.reseekTo(toKV("c").getKey())); + assertEquals(0, scanner.reseekTo(toKV("c", tagUsage).getKey())); assertEquals("c", toRowStr(scanner.getKeyValue())); - assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // seekBefore e, so the scanner points to c - assertEquals(true, scanner.seekBefore(toKV("e").getKey())); + assertEquals(true, scanner.seekBefore(toKV("e", tagUsage).getKey())); assertEquals("c", toRowStr(scanner.getKeyValue())); // reseekTo e and g - assertEquals(0, scanner.reseekTo(toKV("e").getKey())); + assertEquals(0, scanner.reseekTo(toKV("e", tagUsage).getKey())); assertEquals("e", toRowStr(scanner.getKeyValue())); - assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // seekBefore f, so the scanner points to e - assertEquals(true, scanner.seekBefore(toKV("f").getKey())); + assertEquals(true, scanner.seekBefore(toKV("f", tagUsage).getKey())); assertEquals("e", toRowStr(scanner.getKeyValue())); // reseekTo e and g - assertEquals(0, scanner.reseekTo(toKV("e").getKey())); + assertEquals(0, scanner.reseekTo(toKV("e", tagUsage).getKey())); assertEquals("e", toRowStr(scanner.getKeyValue())); - assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // seekBefore g, so the scanner points to e - assertEquals(true, scanner.seekBefore(toKV("g").getKey())); + assertEquals(true, scanner.seekBefore(toKV("g", tagUsage).getKey())); assertEquals("e", toRowStr(scanner.getKeyValue())); // reseekTo e and g again - assertEquals(0, scanner.reseekTo(toKV("e").getKey())); + assertEquals(0, scanner.reseekTo(toKV("e", tagUsage).getKey())); assertEquals("e", toRowStr(scanner.getKeyValue())); - assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // seekBefore h, so the scanner points to g - assertEquals(true, scanner.seekBefore(toKV("h").getKey())); + assertEquals(true, scanner.seekBefore(toKV("h", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // reseekTo g - assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // seekBefore i, so the scanner points to g - assertEquals(true, scanner.seekBefore(toKV("i").getKey())); + assertEquals(true, scanner.seekBefore(toKV("i", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // reseekTo g - assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals(0, scanner.reseekTo(toKV("g", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); // seekBefore j, so the scanner points to i - assertEquals(true, scanner.seekBefore(toKV("j").getKey())); + assertEquals(true, scanner.seekBefore(toKV("j", tagUsage).getKey())); assertEquals("i", toRowStr(scanner.getKeyValue())); // reseekTo i - assertEquals(0, scanner.reseekTo(toKV("i").getKey())); + assertEquals(0, scanner.reseekTo(toKV("i", tagUsage).getKey())); assertEquals("i", toRowStr(scanner.getKeyValue())); // seekBefore k, so the scanner points to i - assertEquals(true, scanner.seekBefore(toKV("k").getKey())); + assertEquals(true, scanner.seekBefore(toKV("k", tagUsage).getKey())); assertEquals("i", toRowStr(scanner.getKeyValue())); // reseekTo i and k - assertEquals(0, scanner.reseekTo(toKV("i").getKey())); + assertEquals(0, scanner.reseekTo(toKV("i", tagUsage).getKey())); assertEquals("i", toRowStr(scanner.getKeyValue())); - assertEquals(0, scanner.reseekTo(toKV("k").getKey())); + assertEquals(0, scanner.reseekTo(toKV("k", tagUsage).getKey())); assertEquals("k", toRowStr(scanner.getKeyValue())); // seekBefore l, so the scanner points to k - assertEquals(true, scanner.seekBefore(toKV("l").getKey())); + assertEquals(true, scanner.seekBefore(toKV("l", tagUsage).getKey())); assertEquals("k", toRowStr(scanner.getKeyValue())); // reseekTo k - assertEquals(0, scanner.reseekTo(toKV("k").getKey())); + assertEquals(0, scanner.reseekTo(toKV("k", tagUsage).getKey())); assertEquals("k", toRowStr(scanner.getKeyValue())); } public void testSeekTo() throws Exception { - Path p = makeNewFile(); + testSeekToInternals(TagUsage.NO_TAG); + testSeekToInternals(TagUsage.ONLY_TAG); + testSeekToInternals(TagUsage.PARTIAL_TAG); + } + + protected void testSeekToInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf)); reader.loadFileInfo(); assertEquals(2, reader.getDataBlockIndexReader().getRootBlockCount()); HFileScanner scanner = reader.getScanner(false, true); // lies before the start of the file. - assertEquals(-1, scanner.seekTo(toKV("a").getKey())); + assertEquals(-1, scanner.seekTo(toKV("a", tagUsage).getKey())); - assertEquals(1, scanner.seekTo(toKV("d").getKey())); + assertEquals(1, scanner.seekTo(toKV("d", tagUsage).getKey())); assertEquals("c", toRowStr(scanner.getKeyValue())); // Across a block boundary now. - assertEquals(1, scanner.seekTo(toKV("h").getKey())); + assertEquals(1, scanner.seekTo(toKV("h", tagUsage).getKey())); assertEquals("g", toRowStr(scanner.getKeyValue())); - assertEquals(1, scanner.seekTo(toKV("l").getKey())); + assertEquals(1, scanner.seekTo(toKV("l", tagUsage).getKey())); assertEquals("k", toRowStr(scanner.getKeyValue())); reader.close(); } - public void testBlockContainingKey() throws Exception { - Path p = makeNewFile(); + testBlockContainingKeyInternals(TagUsage.NO_TAG); + testBlockContainingKeyInternals(TagUsage.ONLY_TAG); + testBlockContainingKeyInternals(TagUsage.PARTIAL_TAG); + } + + protected void testBlockContainingKeyInternals(TagUsage tagUsage) throws IOException { + Path p = makeNewFile(tagUsage); HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf)); reader.loadFileInfo(); HFileBlockIndex.BlockIndexReader blockIndexReader = reader.getDataBlockIndexReader(); System.out.println(blockIndexReader.toString()); - int klen = toKV("a").getKey().length; + int klen = toKV("a", tagUsage).getKey().length; // falls before the start of the file. assertEquals(-1, blockIndexReader.rootBlockContainingKey( - toKV("a").getKey(), 0, klen)); + toKV("a", tagUsage).getKey(), 0, klen)); assertEquals(0, blockIndexReader.rootBlockContainingKey( - toKV("c").getKey(), 0, klen)); + toKV("c", tagUsage).getKey(), 0, klen)); assertEquals(0, blockIndexReader.rootBlockContainingKey( - toKV("d").getKey(), 0, klen)); + toKV("d", tagUsage).getKey(), 0, klen)); assertEquals(0, blockIndexReader.rootBlockContainingKey( - toKV("e").getKey(), 0, klen)); + toKV("e", tagUsage).getKey(), 0, klen)); assertEquals(0, blockIndexReader.rootBlockContainingKey( - toKV("g").getKey(), 0, klen)); + toKV("g", tagUsage).getKey(), 0, klen)); assertEquals(0, blockIndexReader.rootBlockContainingKey( - toKV("h").getKey(), 0, klen)); + toKV("h", tagUsage).getKey(), 0, klen)); assertEquals(1, blockIndexReader.rootBlockContainingKey( - toKV("i").getKey(), 0, klen)); + toKV("i", tagUsage).getKey(), 0, klen)); assertEquals(1, blockIndexReader.rootBlockContainingKey( - toKV("j").getKey(), 0, klen)); + toKV("j", tagUsage).getKey(), 0, klen)); assertEquals(1, blockIndexReader.rootBlockContainingKey( - toKV("k").getKey(), 0, klen)); + toKV("k", tagUsage).getKey(), 0, klen)); assertEquals(1, blockIndexReader.rootBlockContainingKey( - toKV("l").getKey(), 0, klen)); + toKV("l", tagUsage).getKey(), 0, klen)); reader.close(); - } + } + } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java index 1b46c96defc..91e653bce2b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java @@ -344,7 +344,7 @@ public class TestHFileOutputFormat { // first region start key is always empty ret[0] = HConstants.EMPTY_BYTE_ARRAY; for (int i = 1; i < numKeys; i++) { - ret[i] = PerformanceEvaluation.generateValue(random); + ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.VALUE_LENGTH); } return ret; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java index 279295534f1..f3aa15625f0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestLoadIncrementalHFiles.java @@ -25,8 +25,10 @@ import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.regionserver.BloomType; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -58,8 +60,8 @@ public class TestLoadIncrementalHFiles { }; public static int BLOCKSIZE = 64*1024; - public static String COMPRESSION = - Compression.Algorithm.NONE.getName(); + public static Algorithm COMPRESSION = + Compression.Algorithm.NONE; static HBaseTestingUtility util = new HBaseTestingUtility(); //used by secure subclass @@ -260,10 +262,12 @@ public class TestLoadIncrementalHFiles { byte[] family, byte[] qualifier, byte[] startKey, byte[] endKey, int numRows) throws IOException { + HFileContext meta = new HFileContext(); + meta.setBlocksize(BLOCKSIZE); + meta.setCompressAlgo(COMPRESSION); HFile.Writer writer = HFile.getWriterFactory(configuration, new CacheConfig(configuration)) .withPath(fs, path) - .withBlockSize(BLOCKSIZE) - .withCompression(COMPRESSION) + .withFileContext(meta) .create(); long now = System.currentTimeMillis(); try { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java index cc510afe128..501bcf4ce43 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/CreateRandomStoreFile.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.HFileBlockIndex; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.util.BloomFilterFactory; import org.apache.hadoop.io.BytesWritable; @@ -182,14 +183,15 @@ public class CreateRandomStoreFile { Integer.valueOf(cmdLine.getOptionValue(INDEX_BLOCK_SIZE_OPTION))); } + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(compr); + meta.setBlocksize(blockSize); StoreFile.Writer sfw = new StoreFile.WriterBuilder(conf, - new CacheConfig(conf), fs, blockSize) + new CacheConfig(conf), fs) .withOutputDir(outputDir) - .withCompression(compr) .withBloomType(bloomType) .withMaxKeyCount(numKV) - .withChecksumType(HFile.DEFAULT_CHECKSUM_TYPE) - .withBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM) + .withFileContext(meta) .build(); rand = new Random(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java index 4edb4298f87..245fcca1fc3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/DataBlockEncodingTool.java @@ -45,8 +45,10 @@ import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.EncodedDataBlock; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFileBlock; +import org.apache.hadoop.hbase.io.hfile.HFileReaderV2; import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.compress.CompressionOutputStream; @@ -122,7 +124,7 @@ public class DataBlockEncodingTool { private long totalCFLength = 0; private byte[] rawKVs; - private int minorVersion = 0; + private boolean useHBaseChecksum = false; private final String compressionAlgorithmName; private final Algorithm compressionAlgorithm; @@ -206,13 +208,17 @@ public class DataBlockEncodingTool { } rawKVs = uncompressedOutputStream.toByteArray(); - + boolean useTag = (currentKV.getTagsLength() > 0); for (DataBlockEncoding encoding : encodings) { if (encoding == DataBlockEncoding.NONE) { continue; } DataBlockEncoder d = encoding.getEncoder(); - codecs.add(new EncodedDataBlock(d, includesMemstoreTS, encoding, rawKVs)); + HFileContext meta = new HFileContext(); + meta.setCompressAlgo(Compression.Algorithm.NONE); + meta.setIncludesMvcc(includesMemstoreTS); + meta.setIncludesTags(useTag); + codecs.add(new EncodedDataBlock(d, encoding, rawKVs, meta )); } } @@ -232,7 +238,7 @@ public class DataBlockEncodingTool { List> codecIterators = new ArrayList>(); for(EncodedDataBlock codec : codecs) { - codecIterators.add(codec.getIterator(HFileBlock.headerSize(minorVersion))); + codecIterators.add(codec.getIterator(HFileBlock.headerSize(useHBaseChecksum))); } int j = 0; @@ -325,7 +331,7 @@ public class DataBlockEncodingTool { Iterator it; - it = codec.getIterator(HFileBlock.headerSize(minorVersion)); + it = codec.getIterator(HFileBlock.headerSize(useHBaseChecksum)); // count only the algorithm time, without memory allocations // (expect first time) @@ -595,7 +601,9 @@ public class DataBlockEncodingTool { // run the utilities DataBlockEncodingTool comp = new DataBlockEncodingTool(compressionName); - comp.minorVersion = reader.getHFileMinorVersion(); + int majorVersion = reader.getHFileVersion(); + comp.useHBaseChecksum = majorVersion > 2 + || (majorVersion == 2 && reader.getHFileMinorVersion() >= HFileReaderV2.MINOR_VERSION_WITH_CHECKSUM); comp.checkStatistics(scanner, kvLimit); if (doVerify) { comp.verifyCodecs(scanner, kvLimit); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/HFileReadWriteTest.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/HFileReadWriteTest.java index 402c3f41669..5e300a4da54 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/HFileReadWriteTest.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/HFileReadWriteTest.java @@ -350,7 +350,7 @@ public class HFileReadWriteTest { HRegion region = new HRegion(outputDir, null, fs, conf, regionInfo, htd, null); HStore store = new HStore(region, columnDescriptor, conf); - StoreFile.Writer writer = store.createWriterInTmp(maxKeyCount, compression, false, true); + StoreFile.Writer writer = store.createWriterInTmp(maxKeyCount, compression, false, true, false); StatisticsPrinter statsPrinter = new StatisticsPrinter(); statsPrinter.startThread(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java index ea37a38dbaa..bd98eb88bb4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCacheOnWriteInSchema.java @@ -199,7 +199,7 @@ public class TestCacheOnWriteInSchema { public void testCacheOnWriteInSchema() throws IOException { // Write some random data into the store StoreFile.Writer writer = store.createWriterInTmp(Integer.MAX_VALUE, - HFile.DEFAULT_COMPRESSION_ALGORITHM, false, true); + HFile.DEFAULT_COMPRESSION_ALGORITHM, false, true, false); writeStoreFile(writer); writer.close(); // Verify the block types of interest were cached on write diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java index 128d7449dff..8e95e49ac8f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hbase.*; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.HFile; import org.apache.hadoop.hbase.io.hfile.TestHFileWriterV2; @@ -291,13 +292,12 @@ public class TestCompoundBloomFilter { BLOOM_BLOCK_SIZES[t]); conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true); cacheConf = new CacheConfig(conf); - - StoreFile.Writer w = new StoreFile.WriterBuilder(conf, cacheConf, fs, - BLOCK_SIZES[t]) + HFileContext meta = new HFileContext(); + meta.setBlocksize(BLOCK_SIZES[t]); + StoreFile.Writer w = new StoreFile.WriterBuilder(conf, cacheConf, fs) .withOutputDir(TEST_UTIL.getDataTestDir()) .withBloomType(bt) - .withChecksumType(HFile.DEFAULT_CHECKSUM_TYPE) - .withBytesPerChecksum(HFile.DEFAULT_BYTES_PER_CHECKSUM) + .withFileContext(meta) .build(); assertTrue(w.hasGeneralBloom()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java index cf94ec1ddad..873476469e7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java @@ -42,6 +42,7 @@ import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileScanner; import org.apache.hadoop.hbase.io.hfile.NoOpDataBlockEncoder; import org.apache.hadoop.hbase.util.Bytes; @@ -72,9 +73,12 @@ public class TestFSErrorsExposed { FaultyFileSystem faultyfs = new FaultyFileSystem(hfs.getBackingFs()); FileSystem fs = new HFileSystem(faultyfs); CacheConfig cacheConf = new CacheConfig(util.getConfiguration()); + HFileContext meta = new HFileContext(); + meta.setBlocksize(2*1024); StoreFile.Writer writer = new StoreFile.WriterBuilder( - util.getConfiguration(), cacheConf, hfs, 2*1024) + util.getConfiguration(), cacheConf, hfs) .withOutputDir(hfilePath) + .withFileContext(meta) .build(); TestStoreFile.writeStoreFile( writer, Bytes.toBytes("cf"), Bytes.toBytes("qual")); @@ -121,9 +125,12 @@ public class TestFSErrorsExposed { FaultyFileSystem faultyfs = new FaultyFileSystem(hfs.getBackingFs()); HFileSystem fs = new HFileSystem(faultyfs); CacheConfig cacheConf = new CacheConfig(util.getConfiguration()); + HFileContext meta = new HFileContext(); + meta.setBlocksize(2 * 1024); StoreFile.Writer writer = new StoreFile.WriterBuilder( - util.getConfiguration(), cacheConf, hfs, 2 * 1024) + util.getConfiguration(), cacheConf, hfs) .withOutputDir(hfilePath) + .withFileContext(meta) .build(); TestStoreFile.writeStoreFile( writer, Bytes.toBytes("cf"), Bytes.toBytes("qual")); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 7a09c3a20a1..6c15238bd3a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -2356,21 +2356,6 @@ public class TestHRegion extends HBaseTestCase { } } - private void assertICV(byte [] row, - byte [] familiy, - byte[] qualifier, - long amount) throws IOException { - // run a get and see? - Get get = new Get(row); - get.addColumn(familiy, qualifier); - Result result = region.get(get); - assertEquals(1, result.size()); - - Cell kv = result.rawCells()[0]; - long r = Bytes.toLong(CellUtil.cloneValue(kv)); - assertEquals(amount, r); - } - private void assertICV(byte [] row, byte [] familiy, byte[] qualifier, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java index a3218c53834..adf06f4e485 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegionServerBulkLoad.java @@ -40,8 +40,10 @@ import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.io.compress.Compression; +import org.apache.hadoop.hbase.io.compress.Compression.Algorithm; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.protobuf.RequestConverter; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.CompactRegionRequest; @@ -65,7 +67,7 @@ public class TestHRegionServerBulkLoad { private final static byte[] QUAL = Bytes.toBytes("qual"); private final static int NUM_CFS = 10; public static int BLOCKSIZE = 64 * 1024; - public static String COMPRESSION = Compression.Algorithm.NONE.getName(); + public static Algorithm COMPRESSION = Compression.Algorithm.NONE; private final static byte[][] families = new byte[NUM_CFS][]; static { @@ -87,11 +89,13 @@ public class TestHRegionServerBulkLoad { */ public static void createHFile(FileSystem fs, Path path, byte[] family, byte[] qualifier, byte[] value, int numRows) throws IOException { + HFileContext context = new HFileContext(); + context.setBlocksize(BLOCKSIZE); + context.setCompressAlgo(COMPRESSION); HFile.Writer writer = HFile .getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) - .withBlockSize(BLOCKSIZE) - .withCompression(COMPRESSION) + .withFileContext(context) .create(); long now = System.currentTimeMillis(); try { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java index 8e29590d8ea..85d087f90fb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStore.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; @@ -180,7 +181,7 @@ public class TestStore extends TestCase { init(getName(), conf, hcd); // Test createWriterInTmp() - StoreFile.Writer writer = store.createWriterInTmp(4, hcd.getCompression(), false, true); + StoreFile.Writer writer = store.createWriterInTmp(4, hcd.getCompression(), false, true, false); Path path = writer.getPath(); writer.append(new KeyValue(row, family, qf1, Bytes.toBytes(1))); writer.append(new KeyValue(row, family, qf2, Bytes.toBytes(2))); @@ -320,9 +321,12 @@ public class TestStore extends TestCase { long seqid = f.getMaxSequenceId(); Configuration c = HBaseConfiguration.create(); FileSystem fs = FileSystem.get(c); + HFileContext meta = new HFileContext(); + meta.setBlocksize(StoreFile.DEFAULT_BLOCKSIZE_SMALL); StoreFile.Writer w = new StoreFile.WriterBuilder(c, new CacheConfig(c), - fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL) + fs) .withOutputDir(storedir) + .withFileContext(meta) .build(); w.appendMetadata(seqid + 1, false); w.close(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java index f99a9fd01ba..cf9ede7bb3f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java @@ -33,19 +33,20 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseTestCase; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.HFileLink; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.BlockCache; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.CacheStats; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl; import org.apache.hadoop.hbase.io.hfile.HFileScanner; @@ -95,8 +96,11 @@ public class TestStoreFile extends HBaseTestCase { HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem( conf, fs, new Path(this.testDir, hri.getTable().getNameAsString()), hri); - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 2 * 1024) + HFileContext meta = new HFileContext(); + meta.setBlocksize(2 * 1024); + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(regionFs.createTempName()) + .withFileContext(meta) .build(); writeStoreFile(writer); @@ -144,9 +148,12 @@ public class TestStoreFile extends HBaseTestCase { HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem( conf, fs, new Path(this.testDir, hri.getTable().getNameAsString()), hri); + HFileContext meta = new HFileContext(); + meta.setBlocksize(8 * 1024); // Make a store file and write data to it. - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs, 8 * 1024) + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(regionFs.createTempName()) + .withFileContext(meta) .build(); writeStoreFile(writer); @@ -187,10 +194,13 @@ public class TestStoreFile extends HBaseTestCase { FSUtils.setRootDir(testConf, this.testDir); HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem( testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()), hri); + HFileContext meta = new HFileContext(); + meta.setBlocksize(8 * 1024); // Make a store file and write data to it. - StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs, 8 * 1024) + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(regionFs.createTempName()) + .withFileContext(meta) .build(); writeStoreFile(writer); @@ -230,9 +240,12 @@ public class TestStoreFile extends HBaseTestCase { HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem( testConf, fs, FSUtils.getTableDir(this.testDir, hri.getTable()), hri); + HFileContext meta = new HFileContext(); + meta.setBlocksize(8 * 1024); // Make a store file and write data to it. //// - StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs, 8 * 1024) + StoreFile.Writer writer = new StoreFile.WriterBuilder(testConf, cacheConf, this.fs) .withFilePath(regionFs.createTempName()) + .withFileContext(meta) .build(); writeStoreFile(writer); Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath()); @@ -490,13 +503,16 @@ public class TestStoreFile extends HBaseTestCase { // write the file Path f = new Path(ROOT_DIR, getName()); - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs, - StoreFile.DEFAULT_BLOCKSIZE_SMALL) + HFileContext meta = new HFileContext(); + meta.setBlocksize(StoreFile.DEFAULT_BLOCKSIZE_SMALL); + meta.setChecksumType(CKTYPE); + meta.setBytesPerChecksum(CKBYTES); + // Make a store file and write data to it. + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) .withBloomType(BloomType.ROW) .withMaxKeyCount(2000) - .withChecksumType(CKTYPE) - .withBytesPerChecksum(CKBYTES) + .withFileContext(meta) .build(); bloomWriteRead(writer, fs); } @@ -510,12 +526,15 @@ public class TestStoreFile extends HBaseTestCase { // write the file Path f = new Path(ROOT_DIR, getName()); - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, - fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL) + HFileContext meta = new HFileContext(); + meta.setBlocksize(StoreFile.DEFAULT_BLOCKSIZE_SMALL); + meta.setChecksumType(CKTYPE); + meta.setBytesPerChecksum(CKBYTES); + // Make a store file and write data to it. + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) .withMaxKeyCount(2000) - .withChecksumType(CKTYPE) - .withBytesPerChecksum(CKBYTES) + .withFileContext(meta) .build(); // add delete family @@ -563,11 +582,12 @@ public class TestStoreFile extends HBaseTestCase { public void testReseek() throws Exception { // write the file Path f = new Path(ROOT_DIR, getName()); - + HFileContext meta = new HFileContext(); + meta.setBlocksize(8 * 1024); // Make a store file and write data to it. - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, - this.fs, 8 * 1024) + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) + .withFileContext(meta) .build(); writeStoreFile(writer); @@ -606,13 +626,16 @@ public class TestStoreFile extends HBaseTestCase { for (int x : new int[]{0,1}) { // write the file Path f = new Path(ROOT_DIR, getName() + x); - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, - fs, StoreFile.DEFAULT_BLOCKSIZE_SMALL) + HFileContext meta = new HFileContext(); + meta.setBlocksize(StoreFile.DEFAULT_BLOCKSIZE_SMALL); + meta.setChecksumType(CKTYPE); + meta.setBytesPerChecksum(CKBYTES); + // Make a store file and write data to it. + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(f) .withBloomType(bt[x]) .withMaxKeyCount(expKeys[x]) - .withChecksumType(CKTYPE) - .withBytesPerChecksum(CKBYTES) + .withFileContext(meta) .build(); long now = System.currentTimeMillis(); @@ -759,9 +782,12 @@ public class TestStoreFile extends HBaseTestCase { // Make up a directory hierarchy that has a regiondir ("7e0102") and familyname. Path storedir = new Path(new Path(this.testDir, "7e0102"), "familyname"); Path dir = new Path(storedir, "1234567890"); - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, - this.fs, 8 * 1024) + HFileContext meta = new HFileContext(); + meta.setBlocksize(8 * 1024); + // Make a store file and write data to it. + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withOutputDir(dir) + .withFileContext(meta) .build(); List kvList = getKeyValueSet(timestamps,numRows, @@ -943,11 +969,15 @@ public class TestStoreFile extends HBaseTestCase { totalSize += kv.getLength() + 1; } int blockSize = totalSize / numBlocks; - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs, blockSize) + HFileContext meta = new HFileContext(); + meta.setBlocksize(blockSize); + meta.setChecksumType(CKTYPE); + meta.setBytesPerChecksum(CKBYTES); + // Make a store file and write data to it. + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(path) .withMaxKeyCount(2000) - .withChecksumType(CKTYPE) - .withBytesPerChecksum(CKBYTES) + .withFileContext(meta) .build(); // We'll write N-1 KVs to ensure we don't write an extra block kvs.remove(kvs.size()-1); @@ -975,13 +1005,17 @@ public class TestStoreFile extends HBaseTestCase { dataBlockEncoderAlgo, dataBlockEncoderAlgo); cacheConf = new CacheConfig(conf); - StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, fs, - HConstants.DEFAULT_BLOCKSIZE) + HFileContext meta = new HFileContext(); + meta.setBlocksize(HConstants.DEFAULT_BLOCKSIZE); + meta.setChecksumType(CKTYPE); + meta.setBytesPerChecksum(CKBYTES); + meta.setEncodingOnDisk(dataBlockEncoderAlgo); + meta.setEncodingInCache(dataBlockEncoderAlgo); + // Make a store file and write data to it. + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConf, this.fs) .withFilePath(path) - .withDataBlockEncoder(dataBlockEncoder) .withMaxKeyCount(2000) - .withChecksumType(CKTYPE) - .withBytesPerChecksum(CKBYTES) + .withFileContext(meta) .build(); writer.close(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java new file mode 100644 index 00000000000..eaf128fadf1 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestTags.java @@ -0,0 +1,466 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.NavigableMap; +import java.util.TreeMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellScanner; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.HBaseAdmin; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; +import org.apache.hadoop.hbase.regionserver.wal.WALEdit; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +/** + * Class that test tags + */ +@Category(MediumTests.class) +public class TestTags { + static boolean useFilter = false; + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + conf.setInt("hfile.format.version", 3); + conf.setStrings(CoprocessorHost.USER_REGION_COPROCESSOR_CONF_KEY, + TestCoprocessorForTags.class.getName()); + TEST_UTIL.startMiniCluster(1, 2); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @After + public void tearDown() { + useFilter = false; + } + + @Test + public void testTags() throws Exception { + HTable table = null; + try { + TableName tableName = TableName.valueOf("testTags"); + byte[] fam = Bytes.toBytes("info"); + byte[] row = Bytes.toBytes("rowa"); + // column names + byte[] qual = Bytes.toBytes("qual"); + + byte[] row1 = Bytes.toBytes("rowb"); + + byte[] row2 = Bytes.toBytes("rowc"); + + HTableDescriptor desc = new HTableDescriptor(tableName); + HColumnDescriptor colDesc = new HColumnDescriptor(fam); + colDesc.setBlockCacheEnabled(true); + // colDesc.setDataBlockEncoding(DataBlockEncoding.NONE); + colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE); + desc.addFamily(colDesc); + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + admin.createTable(desc); + byte[] value = Bytes.toBytes("value"); + table = new HTable(TEST_UTIL.getConfiguration(), tableName); + Put put = new Put(row); + put.add(fam, qual, HConstants.LATEST_TIMESTAMP, value); + put.setAttribute("visibility", Bytes.toBytes("myTag")); + table.put(put); + admin.flush(tableName.getName()); + List regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 0)) { + Thread.sleep(10); + } + } + + Put put1 = new Put(row1); + byte[] value1 = Bytes.toBytes("1000dfsdf"); + put1.add(fam, qual, HConstants.LATEST_TIMESTAMP, value1); + // put1.setAttribute("visibility", Bytes.toBytes("myTag3")); + table.put(put1); + admin.flush(tableName.getName()); + regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 1)) { + Thread.sleep(10); + } + } + + Put put2 = new Put(row2); + byte[] value2 = Bytes.toBytes("1000dfsdf"); + put2.add(fam, qual, HConstants.LATEST_TIMESTAMP, value2); + put2.setAttribute("visibility", Bytes.toBytes("myTag3")); + table.put(put2); + + admin.flush(tableName.getName()); + regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 2)) { + Thread.sleep(10); + } + } + result(fam, row, qual, row2, table, value, value2, row1, value1); + admin.compact(tableName.getName()); + while(admin.getCompactionState(tableName.getName()) != CompactionState.NONE) { + Thread.sleep(10); + } + result(fam, row, qual, row2, table, value, value2, row1, value1); + } finally { + if (table != null) { + table.close(); + } + } + } + + @Test + public void testFlushAndCompactionWithoutTags() throws Exception { + HTable table = null; + try { + TableName tableName = TableName.valueOf("testFlushAndCompactionWithoutTags"); + byte[] fam = Bytes.toBytes("info"); + byte[] row = Bytes.toBytes("rowa"); + // column names + byte[] qual = Bytes.toBytes("qual"); + + byte[] row1 = Bytes.toBytes("rowb"); + + byte[] row2 = Bytes.toBytes("rowc"); + + HTableDescriptor desc = new HTableDescriptor(tableName); + HColumnDescriptor colDesc = new HColumnDescriptor(fam); + colDesc.setBlockCacheEnabled(true); + // colDesc.setDataBlockEncoding(DataBlockEncoding.NONE); + colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE); + desc.addFamily(colDesc); + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + admin.createTable(desc); + + table = new HTable(TEST_UTIL.getConfiguration(), tableName); + Put put = new Put(row); + byte[] value = Bytes.toBytes("value"); + put.add(fam, qual, HConstants.LATEST_TIMESTAMP, value); + table.put(put); + admin.flush(tableName.getName()); + List regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 0)) { + Thread.sleep(10); + } + } + + Put put1 = new Put(row1); + byte[] value1 = Bytes.toBytes("1000dfsdf"); + put1.add(fam, qual, HConstants.LATEST_TIMESTAMP, value1); + table.put(put1); + admin.flush(tableName.getName()); + regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 1)) { + Thread.sleep(10); + } + } + + Put put2 = new Put(row2); + byte[] value2 = Bytes.toBytes("1000dfsdf"); + put2.add(fam, qual, HConstants.LATEST_TIMESTAMP, value2); + table.put(put2); + + admin.flush(tableName.getName()); + regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 2)) { + Thread.sleep(10); + } + } + Scan s = new Scan(row); + ResultScanner scanner = table.getScanner(s); + try { + Result[] next = scanner.next(3); + for (Result result : next) { + CellScanner cellScanner = result.cellScanner(); + boolean advance = cellScanner.advance(); + KeyValue current = (KeyValue) cellScanner.current(); + assertTrue(current.getValueOffset() + current.getValueLength() == current.getLength()); + } + } finally { + if (scanner != null) + scanner.close(); + } + admin.compact(tableName.getName()); + while(admin.getCompactionState(tableName.getName()) != CompactionState.NONE) { + Thread.sleep(10); + } + s = new Scan(row); + scanner = table.getScanner(s); + try { + Result[] next = scanner.next(3); + for (Result result : next) { + CellScanner cellScanner = result.cellScanner(); + boolean advance = cellScanner.advance(); + KeyValue current = (KeyValue) cellScanner.current(); + assertTrue(current.getValueOffset() + current.getValueLength() == current.getLength()); + } + } finally { + if (scanner != null) { + scanner.close(); + } + } + } finally { + if (table != null) { + table.close(); + } + } + } + @Test + public void testFlushAndCompactionwithCombinations() throws Exception { + HTable table = null; + try { + TableName tableName = TableName.valueOf("testFlushAndCompactionwithCombinations"); + byte[] fam = Bytes.toBytes("info"); + byte[] row = Bytes.toBytes("rowa"); + // column names + byte[] qual = Bytes.toBytes("qual"); + + byte[] row1 = Bytes.toBytes("rowb"); + + byte[] row2 = Bytes.toBytes("rowc"); + byte[] rowd = Bytes.toBytes("rowd"); + byte[] rowe = Bytes.toBytes("rowe"); + + HTableDescriptor desc = new HTableDescriptor(tableName); + HColumnDescriptor colDesc = new HColumnDescriptor(fam); + colDesc.setBlockCacheEnabled(true); + // colDesc.setDataBlockEncoding(DataBlockEncoding.NONE); + colDesc.setDataBlockEncoding(DataBlockEncoding.PREFIX_TREE); + desc.addFamily(colDesc); + HBaseAdmin admin = TEST_UTIL.getHBaseAdmin(); + admin.createTable(desc); + + table = new HTable(TEST_UTIL.getConfiguration(), tableName); + Put put = new Put(row); + byte[] value = Bytes.toBytes("value"); + Tag[] tags = new Tag[1]; + tags[0] = new Tag((byte) 1, "ram"); + put.add(fam, qual, HConstants.LATEST_TIMESTAMP, value, tags); + // put.setAttribute("visibility", Bytes.toBytes("myTag")); + table.put(put); + Put put1 = new Put(row1); + byte[] value1 = Bytes.toBytes("1000dfsdf"); + put1.add(fam, qual, HConstants.LATEST_TIMESTAMP, value1); + table.put(put1); + admin.flush(tableName.getName()); + List regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 0)) { + Thread.sleep(10); + } + } + + put1 = new Put(row2); + value1 = Bytes.toBytes("1000dfsdf"); + put1.add(fam, qual, HConstants.LATEST_TIMESTAMP, value1); + table.put(put1); + admin.flush(tableName.getName()); + regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 1)) { + Thread.sleep(10); + } + } + Put put2 = new Put(rowd); + byte[] value2 = Bytes.toBytes("1000dfsdf"); + put2.add(fam, qual, HConstants.LATEST_TIMESTAMP, value2); + table.put(put2); + put2 = new Put(rowe); + value2 = Bytes.toBytes("1000dfsddfdf"); + put2.add(fam, qual, HConstants.LATEST_TIMESTAMP, value2, tags); + // put2.setAttribute("visibility", Bytes.toBytes("myTag3")); + table.put(put2); + admin.flush(tableName.getName()); + regions = TEST_UTIL.getHBaseCluster().getRegions(tableName.getName()); + for(HRegion region : regions) { + Store store = region.getStore(fam); + while(!(store.getStorefilesCount() > 2)) { + Thread.sleep(10); + } + } + Scan s = new Scan(row); + ResultScanner scanner = table.getScanner(s); + try { + Result[] next = scanner.next(5); + for (Result result : next) { + CellScanner cellScanner = result.cellScanner(); + boolean advance = cellScanner.advance(); + KeyValue current = (KeyValue) cellScanner.current(); + // System.out.println(current); + int tagsLength = current.getTagsLength(); + if (tagsLength == 0) { + assertTrue(current.getValueOffset() + current.getValueLength() == current.getLength()); + } else { + // even if taglength is going to be > 0 the byte array would be same + assertTrue(current.getValueOffset() + current.getValueLength() != current.getLength()); + } + } + } finally { + if (scanner != null) { + scanner.close(); + } + } + while(admin.getCompactionState(tableName.getName()) != CompactionState.NONE) { + Thread.sleep(10); + } + s = new Scan(row); + scanner = table.getScanner(s); + try { + Result[] next = scanner.next(5); + for (Result result : next) { + CellScanner cellScanner = result.cellScanner(); + boolean advance = cellScanner.advance(); + KeyValue current = (KeyValue) cellScanner.current(); + // System.out.println(current); + if (current.getTagsLength() == 0) { + assertTrue(current.getValueOffset() + current.getValueLength() == current.getLength()); + } else { + // even if taglength is going to be > 0 the byte array would be same + assertTrue(current.getValueOffset() + current.getValueLength() != current.getLength()); + } + } + } finally { + if (scanner != null) { + scanner.close(); + } + } + } finally { + if (table != null) { + table.close(); + } + } + } + + private void result(byte[] fam, byte[] row, byte[] qual, byte[] row2, HTable table, byte[] value, + byte[] value2, byte[] row1, byte[] value1) throws IOException { + Scan s = new Scan(row); + // If filters are used this attribute can be specifically check for in + // filterKV method and + // kvs can be filtered out if the tags of interest is not found in that kv + s.setAttribute("visibility", Bytes.toBytes("myTag")); + ResultScanner scanner = null; + try { + scanner = table.getScanner(s); + Result next = scanner.next(); + CellScanner cellScanner = next.cellScanner(); + boolean advance = cellScanner.advance(); + KeyValue current = (KeyValue) cellScanner.current(); + + assertTrue(Bytes.equals(next.getRow(), row)); + assertTrue(Bytes.equals(next.getValue(fam, qual), value)); + + Result next2 = scanner.next(); + assertTrue(next2 != null); + assertTrue(Bytes.equals(next2.getRow(), row1)); + assertTrue(Bytes.equals(next2.getValue(fam, qual), value1)); + + next2 = scanner.next(); + assertTrue(next2 != null); + assertTrue(Bytes.equals(next2.getRow(), row2)); + assertTrue(Bytes.equals(next2.getValue(fam, qual), value2)); + + } finally { + if (scanner != null) + scanner.close(); + } + } + + public static class TestCoprocessorForTags extends BaseRegionObserver { + @Override + public void prePut(final ObserverContext e, final Put put, + final WALEdit edit, final Durability durability) throws IOException { + byte[] attribute = put.getAttribute("visibility"); + byte[] cf = null; + List updatedCells = new ArrayList(); + if (attribute != null) { + for (List edits : put.getFamilyCellMap().values()) { + for (Cell cell : edits) { + KeyValue kv = KeyValueUtil.ensureKeyValue(cell); + if (cf == null) { + cf = kv.getFamily(); + } + Tag tag = new Tag((byte) 1, attribute); + List tagList = new ArrayList(); + tagList.add(tag); + + KeyValue newKV = new KeyValue(kv.getRow(), 0, kv.getRowLength(), kv.getFamily(), 0, + kv.getFamilyLength(), kv.getQualifier(), 0, kv.getQualifierLength(), + kv.getTimestamp(), KeyValue.Type.codeToType(kv.getType()), kv.getValue(), 0, + kv.getValueLength(), tagList); + ((List) updatedCells).add(newKV); + } + } + // add new set of familymap to the put. Can we update the existing kvs + // itself + NavigableMap> familyMap = new TreeMap>( + Bytes.BYTES_COMPARATOR); + put.getFamilyCellMap().remove(cf); + // Update the family map + put.getFamilyCellMap().put(cf, updatedCells); + } + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestKeyValueCompression.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestKeyValueCompression.java index ba570565032..afd05898973 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestKeyValueCompression.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestKeyValueCompression.java @@ -19,10 +19,13 @@ package org.apache.hadoop.hbase.regionserver.wal; import java.io.ByteArrayInputStream; import java.io.DataInputStream; +import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.DataOutputBuffer; import org.junit.Test; @@ -78,4 +81,33 @@ public class TestKeyValueCompression { assertEquals(kv, readBack); } } + + @Test + public void testKVWithTags() throws Exception { + CompressionContext ctx = new CompressionContext(LRUDictionary.class, false); + DataOutputBuffer buf = new DataOutputBuffer(BUF_SIZE); + KeyValueCompression.writeKV(buf, createKV(1), ctx); + KeyValueCompression.writeKV(buf, createKV(0), ctx); + KeyValueCompression.writeKV(buf, createKV(2), ctx); + + ctx.clear(); + DataInputStream in = new DataInputStream(new ByteArrayInputStream( + buf.getData(), 0, buf.getLength())); + + KeyValue readBack = KeyValueCompression.readKV(in, ctx); + List tags = readBack.getTags(); + assertEquals(1, tags.size()); + } + + private KeyValue createKV(int noOfTags) { + byte[] row = Bytes.toBytes("myRow"); + byte[] cf = Bytes.toBytes("myCF"); + byte[] q = Bytes.toBytes("myQualifier"); + byte[] value = Bytes.toBytes("myValue"); + List tags = new ArrayList(noOfTags); + for (int i = 1; i <= noOfTags; i++) { + tags.add(new Tag((byte) i, Bytes.toBytes("tagValue" + i))); + } + return new KeyValue(row, cf, q, HConstants.LATEST_TIMESTAMP, value, tags); + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALCellCodecWithCompression.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALCellCodecWithCompression.java new file mode 100644 index 00000000000..e3c1cc82298 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALCellCodecWithCompression.java @@ -0,0 +1,83 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver.wal; + +import static org.junit.Assert.assertEquals; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.codec.Codec.Decoder; +import org.apache.hadoop.hbase.codec.Codec.Encoder; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestWALCellCodecWithCompression { + + @Test + public void testEncodeDecodeKVsWithTags() throws Exception { + WALCellCodec codec = new WALCellCodec(new Configuration(false), new CompressionContext( + LRUDictionary.class, false)); + ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + Encoder encoder = codec.getEncoder(bos); + encoder.write(createKV(1)); + encoder.write(createKV(0)); + encoder.write(createKV(2)); + + InputStream is = new ByteArrayInputStream(bos.toByteArray()); + Decoder decoder = codec.getDecoder(is); + decoder.advance(); + KeyValue kv = (KeyValue) decoder.current(); + List tags = kv.getTags(); + assertEquals(1, tags.size()); + assertEquals("tagValue1", Bytes.toString(tags.get(0).getValue())); + decoder.advance(); + kv = (KeyValue) decoder.current(); + tags = kv.getTags(); + assertEquals(0, tags.size()); + decoder.advance(); + kv = (KeyValue) decoder.current(); + tags = kv.getTags(); + assertEquals(2, tags.size()); + assertEquals("tagValue1", Bytes.toString(tags.get(0).getValue())); + assertEquals("tagValue2", Bytes.toString(tags.get(1).getValue())); + } + + private KeyValue createKV(int noOfTags) { + byte[] row = Bytes.toBytes("myRow"); + byte[] cf = Bytes.toBytes("myCF"); + byte[] q = Bytes.toBytes("myQualifier"); + byte[] value = Bytes.toBytes("myValue"); + List tags = new ArrayList(noOfTags); + for (int i = 1; i <= noOfTags; i++) { + tags.add(new Tag((byte) i, Bytes.toBytes("tagValue" + i))); + } + return new KeyValue(row, cf, q, HConstants.LATEST_TIMESTAMP, value, tags); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java index ea78743fdf9..8d03c932abb 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/wal/TestWALReplay.java @@ -59,6 +59,7 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.regionserver.DefaultStoreEngine; @@ -326,8 +327,10 @@ public class TestWALReplay { HLog wal = createWAL(this.conf); HRegion region = HRegion.openHRegion(hri, htd, wal, this.conf); Path f = new Path(basedir, "hfile"); + HFileContext context = new HFileContext(); HFile.Writer writer = - HFile.getWriterFactoryNoCache(conf).withPath(fs, f).create(); + HFile.getWriterFactoryNoCache(conf).withPath(fs, f) + .withFileContext(context).create(); byte [] family = htd.getFamilies().iterator().next().getName(); byte [] row = tableName.getName(); writer.append(new KeyValue(row, family, family, row)); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java index 20e43d40315..8f4aaaa5b8b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/access/TestAccessController.java @@ -67,6 +67,7 @@ import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.coprocessor.RegionServerCoprocessorEnvironment; import org.apache.hadoop.hbase.io.hfile.CacheConfig; import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.io.hfile.HFileContext; import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles; import org.apache.hadoop.hbase.master.MasterCoprocessorHost; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; @@ -842,8 +843,10 @@ public class TestAccessController extends SecureTestUtil { HFile.Writer writer = null; long now = System.currentTimeMillis(); try { + HFileContext context = new HFileContext(); writer = HFile.getWriterFactory(conf, new CacheConfig(conf)) .withPath(fs, path) + .withFileContext(context) .create(); // subtract 2 since numRows doesn't include boundary keys for (byte[] key : Bytes.iterateOnSplits(startKey, endKey, true, numRows-2)) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java index 229470ffdc6..14234c30059 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/LoadTestTool.java @@ -21,18 +21,19 @@ import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.Random; import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.cli.CommandLine; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.PerformanceEvaluation; +import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.HBaseAdmin; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; @@ -101,6 +102,13 @@ public class LoadTestTool extends AbstractHBaseTool { public static final String OPT_INMEMORY = "in_memory"; public static final String OPT_USAGE_IN_MEMORY = "Tries to keep the HFiles of the CF " + "inmemory as far as possible. Not guaranteed that reads are always served from inmemory"; + public static final String OPT_USETAGS = "usetags"; + public static final String OPT_USAGE_USETAG = "Adds tags with every KV. This option would be used" + + " only if the HFileV3 version is used"; + + public static final String OPT_NUM_TAGS = "num_tags"; + public static final String OPT_USAGE_NUM_TAGS = "Specifies the minimum and number of tags to be" + + " added per KV"; protected static final String OPT_KEY_WINDOW = "key_window"; protected static final String OPT_WRITE = "write"; @@ -136,6 +144,9 @@ public class LoadTestTool extends AbstractHBaseTool { protected Compression.Algorithm compressAlgo; protected BloomType bloomType; private boolean inMemoryCF; + private boolean useTags; + private int minNumTags = 1; + private int maxNumTags = 1; // Writer options protected int numWriterThreads = DEFAULT_NUM_THREADS; protected int minColsPerKey, maxColsPerKey; @@ -241,6 +252,8 @@ public class LoadTestTool extends AbstractHBaseTool { "separate updates for every column in a row"); addOptNoArg(OPT_ENCODE_IN_CACHE_ONLY, OPT_ENCODE_IN_CACHE_ONLY_USAGE); addOptNoArg(OPT_INMEMORY, OPT_USAGE_IN_MEMORY); + addOptNoArg(OPT_USETAGS, OPT_USAGE_USETAG); + addOptWithArg(OPT_NUM_TAGS, OPT_USAGE_NUM_TAGS + " The default is 1:1"); addOptWithArg(OPT_NUM_KEYS, "The number of keys to read/write"); addOptWithArg(OPT_START_KEY, "The first key to read/write " + @@ -379,6 +392,19 @@ public class LoadTestTool extends AbstractHBaseTool { BloomType.valueOf(bloomStr); inMemoryCF = cmd.hasOption(OPT_INMEMORY); + useTags = cmd.hasOption(OPT_USETAGS); + if (useTags) { + if (cmd.hasOption(OPT_NUM_TAGS)) { + String[] readOpts = splitColonSeparated(OPT_NUM_TAGS, 1, 2); + int colIndex = 0; + minNumTags = parseInt(readOpts[colIndex++], 1, 100); + if (colIndex < readOpts.length) { + maxNumTags = parseInt(readOpts[colIndex++], 1, 100); + } + } + System.out.println("Using tags, number of tags per KV: min=" + minNumTags + ", max=" + + maxNumTags); + } } @@ -445,17 +471,20 @@ public class LoadTestTool extends AbstractHBaseTool { if (isWrite) { System.out.println("Starting to write data..."); - writerThreads.start(startKey, endKey, numWriterThreads); + writerThreads.start(startKey, endKey, numWriterThreads, useTags, minNumTags, maxNumTags); } if (isUpdate) { + LOG.info("Starting to mutate data..."); System.out.println("Starting to mutate data..."); - updaterThreads.start(startKey, endKey, numUpdaterThreads); + // TODO : currently append and increment operations not tested with tags + // Will update this aftet it is done + updaterThreads.start(startKey, endKey, numUpdaterThreads, true, minNumTags, maxNumTags); } if (isRead) { System.out.println("Starting to read data..."); - readerThreads.start(startKey, endKey, numReaderThreads); + readerThreads.start(startKey, endKey, numReaderThreads, useTags, 0, 0); } if (isWrite) { @@ -484,6 +513,27 @@ public class LoadTestTool extends AbstractHBaseTool { return success ? EXIT_SUCCESS : EXIT_FAILURE; } + static byte[] generateData(final Random r, int length) { + byte [] b = new byte [length]; + int i = 0; + + for(i = 0; i < (length-8); i += 8) { + b[i] = (byte) (65 + r.nextInt(26)); + b[i+1] = b[i]; + b[i+2] = b[i]; + b[i+3] = b[i]; + b[i+4] = b[i]; + b[i+5] = b[i]; + b[i+6] = b[i]; + b[i+7] = b[i]; + } + + byte a = (byte) (65 + r.nextInt(26)); + for(; i < length; i++) { + b[i] = a; + } + return b; + } public static void main(String[] args) { new LoadTestTool().doStaticMain(args); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java index 56ad8c3fb68..51f998fa5ef 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedAction.java @@ -68,6 +68,10 @@ public abstract class MultiThreadedAction { protected AtomicLong totalOpTimeMs = new AtomicLong(); protected boolean verbose = false; + protected boolean useTags = false; + protected int minNumTags = 1; + protected int maxNumTags = 1; + protected LoadTestDataGenerator dataGenerator = null; /** @@ -149,11 +153,14 @@ public abstract class MultiThreadedAction { this.actionLetter = actionLetter; } - public void start(long startKey, long endKey, int numThreads) - throws IOException { + public void start(long startKey, long endKey, int numThreads, boolean useTags, int minNumTags, + int maxNumTags) throws IOException { this.startKey = startKey; this.endKey = endKey; this.numThreads = numThreads; + this.useTags = useTags; + this.minNumTags = minNumTags; + this.maxNumTags = maxNumTags; (new Thread(new ProgressReporter(actionLetter))).start(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java index a32b55af8ca..e4953e8decd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedReader.java @@ -91,9 +91,9 @@ public class MultiThreadedReader extends MultiThreadedAction } @Override - public void start(long startKey, long endKey, int numThreads) - throws IOException { - super.start(startKey, endKey, numThreads); + public void start(long startKey, long endKey, int numThreads, boolean useTags, + int minNumTags, int maxNumTags) throws IOException { + super.start(startKey, endKey, numThreads, useTags, minNumTags, maxNumTags); if (verbose) { LOG.debug("Reading keys [" + startKey + ", " + endKey + ")"); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdater.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdater.java index ca36d56f3d0..e0edeaf109d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdater.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedUpdater.java @@ -79,9 +79,9 @@ public class MultiThreadedUpdater extends MultiThreadedWriterBase { } @Override - public void start(long startKey, long endKey, int numThreads) - throws IOException { - super.start(startKey, endKey, numThreads); + public void start(long startKey, long endKey, int numThreads, boolean useTags, int minNumTags, + int maxNumTags) throws IOException { + super.start(startKey, endKey, numThreads, useTags, minNumTags, maxNumTags); if (verbose) { LOG.debug("Updating keys [" + startKey + ", " + endKey + ")"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java index 699e19c36eb..324720f3900 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriter.java @@ -24,6 +24,7 @@ import static org.apache.hadoop.hbase.util.test.LoadTestDataGenerator.MUTATE_INF import java.io.IOException; import java.util.Arrays; import java.util.HashSet; +import java.util.Random; import java.util.Set; import org.apache.commons.logging.Log; @@ -31,6 +32,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.util.test.LoadTestDataGenerator; @@ -43,6 +45,11 @@ public class MultiThreadedWriter extends MultiThreadedWriterBase { private boolean isMultiPut = false; + private Random random = new Random(); + // TODO: Make this configurable + private int minTagLength = 16; + private int maxTagLength = 512; + public MultiThreadedWriter(LoadTestDataGenerator dataGen, Configuration conf, TableName tableName) { super(dataGen, conf, tableName, "W"); @@ -54,9 +61,9 @@ public class MultiThreadedWriter extends MultiThreadedWriterBase { } @Override - public void start(long startKey, long endKey, int numThreads) - throws IOException { - super.start(startKey, endKey, numThreads); + public void start(long startKey, long endKey, int numThreads, boolean useTags, + int minNumTags, int maxNumTags) throws IOException { + super.start(startKey, endKey, numThreads, useTags, minNumTags, maxNumTags); if (verbose) { LOG.debug("Inserting keys [" + startKey + ", " + endKey + ")"); @@ -89,9 +96,26 @@ public class MultiThreadedWriter extends MultiThreadedWriterBase { int columnCount = 0; for (byte[] cf : columnFamilies) { byte[][] columns = dataGenerator.generateColumnsForCf(rowKey, cf); + int numTags; + if (minNumTags == maxNumTags) { + numTags = minNumTags; + } else { + numTags = minNumTags + random.nextInt(maxNumTags - minNumTags); + } + Tag[] tags = new Tag[numTags]; for (byte[] column : columns) { byte[] value = dataGenerator.generateValue(rowKey, cf, column); - put.add(cf, column, value); + byte[] tag = LoadTestTool.generateData(random, + minTagLength + random.nextInt(maxTagLength - minTagLength)); + if(useTags) { + for (int n = 0; n < numTags; n++) { + Tag t = new Tag((byte) n, tag); + tags[n] = t; + } + put.add(cf, column, value, tags); + } else { + put.add(cf, column, value); + } ++columnCount; if (!isMultiPut) { insert(table, put, rowKeyBase); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriterBase.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriterBase.java index c4b8d2acb67..ffa18213907 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriterBase.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/MultiThreadedWriterBase.java @@ -83,9 +83,9 @@ public abstract class MultiThreadedWriterBase extends MultiThreadedAction { } @Override - public void start(long startKey, long endKey, int numThreads) - throws IOException { - super.start(startKey, endKey, numThreads); + public void start(long startKey, long endKey, int numThreads, boolean useTags, int minNumTags, + int maxNumTags) throws IOException { + super.start(startKey, endKey, numThreads, useTags, minNumTags, maxNumTags); nextKeyToWrite.set(startKey); wroteUpToKey.set(startKey - 1); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java index d26764774a2..45c5957ef0b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/RestartMetaTest.java @@ -81,7 +81,7 @@ public class RestartMetaTest extends AbstractHBaseTool { minColDataSize, maxColDataSize, minColsPerKey, maxColsPerKey, LoadTestTool.COLUMN_FAMILY); MultiThreadedWriter writer = new MultiThreadedWriter(dataGen, conf, TABLE_NAME); writer.setMultiPut(true); - writer.start(startKey, endKey, numThreads); + writer.start(startKey, endKey, numThreads, false, 0, 0); System.out.printf("Started loading data..."); writer.waitForFinish(); System.out.printf("Finished loading data..."); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java index eab23d5c28a..86b7db72df2 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadParallel.java @@ -45,8 +45,8 @@ public class TestMiniClusterLoadParallel readerThreads.linkToWriter(writerThreads); - writerThreads.start(0, numKeys, NUM_THREADS); - readerThreads.start(0, numKeys, NUM_THREADS); + writerThreads.start(0, numKeys, NUM_THREADS, false, 0, 0); + readerThreads.start(0, numKeys, NUM_THREADS, false, 0, 0); writerThreads.waitForFinish(); readerThreads.waitForFinish(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java index aed7edd696c..032bc781606 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/util/TestMiniClusterLoadSequential.java @@ -129,11 +129,11 @@ public class TestMiniClusterLoadSequential { } protected void runLoadTestOnExistingTable() throws IOException { - writerThreads.start(0, numKeys, NUM_THREADS); + writerThreads.start(0, numKeys, NUM_THREADS, false, 0, 0); writerThreads.waitForFinish(); assertEquals(0, writerThreads.getNumWriteFailures()); - readerThreads.start(0, numKeys, NUM_THREADS); + readerThreads.start(0, numKeys, NUM_THREADS, false, 0, 0); readerThreads.waitForFinish(); assertEquals(0, readerThreads.getNumReadFailures()); assertEquals(0, readerThreads.getNumReadErrors()); diff --git a/hbase-server/src/test/resources/mapred-site.xml b/hbase-server/src/test/resources/mapred-site.xml index b8949fef6a0..787ffb75511 100644 --- a/hbase-server/src/test/resources/mapred-site.xml +++ b/hbase-server/src/test/resources/mapred-site.xml @@ -31,3 +31,4 @@ -Djava.awt.headless=true +