HBASE-7414. Convert some HFile metadata to PB

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1430106 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Kyle Purtell 2013-01-08 01:15:05 +00:00
parent 34f53170b9
commit 176ddb066c
4 changed files with 1353 additions and 62 deletions

View File

@ -27,3 +27,19 @@ import "hbase.proto";
message FileInfoProto { message FileInfoProto {
repeated BytesBytesPair mapEntry = 1; repeated BytesBytesPair mapEntry = 1;
} }
// HFile file trailer
message FileTrailerProto {
optional uint64 fileInfoOffset = 1;
optional uint64 loadOnOpenDataOffset = 2;
optional uint64 uncompressedDataIndexSize = 3;
optional uint64 totalUncompressedBytes = 4;
optional uint32 dataIndexCount = 5;
optional uint32 metaIndexCount = 6;
optional uint64 entryCount = 7;
optional uint32 numDataIndexLevels = 8;
optional uint64 firstDataBlockOffset = 9;
optional uint64 lastDataBlockOffset = 10;
optional string comparatorClassName = 11;
optional uint32 compressionCodec = 12;
}

View File

@ -18,13 +18,10 @@
*/ */
package org.apache.hadoop.hbase.io.hfile; package org.apache.hadoop.hbase.io.hfile;
import static org.apache.hadoop.hbase.io.hfile.HFile.MAX_FORMAT_VERSION;
import static org.apache.hadoop.hbase.io.hfile.HFile.MIN_FORMAT_VERSION;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream; import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream; import java.io.DataOutputStream;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
@ -34,6 +31,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.protobuf.generated.HFileProtos;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.RawComparator;
@ -57,6 +55,9 @@ public class FixedFileTrailer {
private static final Log LOG = LogFactory.getLog(FixedFileTrailer.class); private static final Log LOG = LogFactory.getLog(FixedFileTrailer.class);
/** HFile minor version that introduced pbuf filetrailer */
private static final int PBUF_TRAILER_MINOR_VERSION = 2;
/** /**
* We store the comparator class name as a fixed-length field in the trailer. * We store the comparator class name as a fixed-length field in the trailer.
*/ */
@ -129,11 +130,10 @@ public class FixedFileTrailer {
private static int[] computeTrailerSizeByVersion() { private static int[] computeTrailerSizeByVersion() {
int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1]; int versionToSize[] = new int[HFile.MAX_FORMAT_VERSION + 1];
for (int version = MIN_FORMAT_VERSION; for (int version = HFile.MIN_FORMAT_VERSION;
version <= MAX_FORMAT_VERSION; version <= HFile.MAX_FORMAT_VERSION;
++version) { ++version) {
FixedFileTrailer fft = new FixedFileTrailer(version, FixedFileTrailer fft = new FixedFileTrailer(version, HFileBlock.MINOR_VERSION_NO_CHECKSUM);
HFileBlock.MINOR_VERSION_NO_CHECKSUM);
DataOutputStream dos = new DataOutputStream(new NullOutputStream()); DataOutputStream dos = new DataOutputStream(new NullOutputStream());
try { try {
fft.serialize(dos); fft.serialize(dos);
@ -148,8 +148,8 @@ public class FixedFileTrailer {
private static int getMaxTrailerSize() { private static int getMaxTrailerSize() {
int maxSize = 0; int maxSize = 0;
for (int version = MIN_FORMAT_VERSION; for (int version = HFile.MIN_FORMAT_VERSION;
version <= MAX_FORMAT_VERSION; version <= HFile.MAX_FORMAT_VERSION;
++version) ++version)
maxSize = Math.max(getTrailerSize(version), maxSize); maxSize = Math.max(getTrailerSize(version), maxSize);
return maxSize; return maxSize;
@ -158,6 +158,8 @@ public class FixedFileTrailer {
private static final int TRAILER_SIZE[] = computeTrailerSizeByVersion(); private static final int TRAILER_SIZE[] = computeTrailerSizeByVersion();
private static final int MAX_TRAILER_SIZE = getMaxTrailerSize(); private static final int MAX_TRAILER_SIZE = getMaxTrailerSize();
private static final int NOT_PB_SIZE = BlockType.MAGIC_LENGTH + Bytes.SIZEOF_INT;
static int getTrailerSize(int version) { static int getTrailerSize(int version) {
return TRAILER_SIZE[version]; return TRAILER_SIZE[version];
} }
@ -178,44 +180,91 @@ public class FixedFileTrailer {
HFile.checkFormatVersion(majorVersion); HFile.checkFormatVersion(majorVersion);
ByteArrayOutputStream baos = new ByteArrayOutputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutput baosDos = new DataOutputStream(baos); DataOutputStream baosDos = new DataOutputStream(baos);
BlockType.TRAILER.write(baosDos); BlockType.TRAILER.write(baosDos);
baosDos.writeLong(fileInfoOffset); if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) {
baosDos.writeLong(loadOnOpenDataOffset); serializeAsPB(baosDos);
baosDos.writeInt(dataIndexCount);
if (majorVersion == 1) {
// This used to be metaIndexOffset, but it was not used in version 1.
baosDos.writeLong(0);
} else { } else {
baosDos.writeLong(uncompressedDataIndexSize); serializeAsWritable(baosDos);
} }
baosDos.writeInt(metaIndexCount); // The last 4 bytes of the file encode the major and minor version universally
baosDos.writeLong(totalUncompressedBytes);
if (majorVersion == 1) {
baosDos.writeInt((int) Math.min(Integer.MAX_VALUE, entryCount));
} else {
// This field is long from version 2 onwards.
baosDos.writeLong(entryCount);
}
baosDos.writeInt(compressionCodec.ordinal());
if (majorVersion > 1) {
baosDos.writeInt(numDataIndexLevels);
baosDos.writeLong(firstDataBlockOffset);
baosDos.writeLong(lastDataBlockOffset);
Bytes.writeStringFixedSize(baosDos, comparatorClassName,
MAX_COMPARATOR_NAME_LENGTH);
}
// serialize the major and minor versions
baosDos.writeInt(materializeVersion(majorVersion, minorVersion)); baosDos.writeInt(materializeVersion(majorVersion, minorVersion));
outputStream.write(baos.toByteArray()); outputStream.write(baos.toByteArray());
} }
/**
* Write trailer data as protobuf
* @param outputStream
* @throws IOException
*/
void serializeAsPB(DataOutputStream output) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
HFileProtos.FileTrailerProto.newBuilder()
.setFileInfoOffset(fileInfoOffset)
.setLoadOnOpenDataOffset(loadOnOpenDataOffset)
.setUncompressedDataIndexSize(uncompressedDataIndexSize)
.setTotalUncompressedBytes(totalUncompressedBytes)
.setDataIndexCount(dataIndexCount)
.setMetaIndexCount(metaIndexCount)
.setEntryCount(entryCount)
.setNumDataIndexLevels(numDataIndexLevels)
.setFirstDataBlockOffset(firstDataBlockOffset)
.setLastDataBlockOffset(lastDataBlockOffset)
.setComparatorClassName(comparatorClassName)
.setCompressionCodec(compressionCodec.ordinal())
.build().writeDelimitedTo(baos);
output.write(baos.toByteArray());
// Pad to make up the difference between variable PB encoding length and the
// length when encoded as writable under earlier V2 formats. Failure to pad
// properly or if the PB encoding is too big would mean the trailer wont be read
// in properly by HFile.
int padding = getTrailerSize() - NOT_PB_SIZE - baos.size();
if (padding < 0) {
throw new IOException("Pbuf encoding size exceeded fixed trailer size limit");
}
for (int i = 0; i < padding; i++) {
output.write(0);
}
}
/**
* Write trailer data as writable
* @param outputStream
* @throws IOException
*/
void serializeAsWritable(DataOutputStream output) throws IOException {
output.writeLong(fileInfoOffset);
output.writeLong(loadOnOpenDataOffset);
output.writeInt(dataIndexCount);
if (majorVersion == 1) {
// This used to be metaIndexOffset, but it was not used in version 1.
output.writeLong(0);
} else {
output.writeLong(uncompressedDataIndexSize);
}
output.writeInt(metaIndexCount);
output.writeLong(totalUncompressedBytes);
if (majorVersion == 1) {
output.writeInt((int) Math.min(Integer.MAX_VALUE, entryCount));
} else {
// This field is long from version 2 onwards.
output.writeLong(entryCount);
}
output.writeInt(compressionCodec.ordinal());
if (majorVersion > 1) {
output.writeInt(numDataIndexLevels);
output.writeLong(firstDataBlockOffset);
output.writeLong(lastDataBlockOffset);
Bytes.writeStringFixedSize(output, comparatorClassName, MAX_COMPARATOR_NAME_LENGTH);
}
}
/** /**
* Deserialize the fixed file trailer from the given stream. The version needs * Deserialize the fixed file trailer from the given stream. The version needs
* to already be specified. Make sure this is consistent with * to already be specified. Make sure this is consistent with
@ -229,33 +278,99 @@ public class FixedFileTrailer {
BlockType.TRAILER.readAndCheck(inputStream); BlockType.TRAILER.readAndCheck(inputStream);
fileInfoOffset = inputStream.readLong(); if (majorVersion > 2 || (majorVersion == 2 && minorVersion >= PBUF_TRAILER_MINOR_VERSION)) {
loadOnOpenDataOffset = inputStream.readLong(); deserializeFromPB(inputStream);
dataIndexCount = inputStream.readInt();
if (majorVersion == 1) {
inputStream.readLong(); // Read and skip metaIndexOffset.
} else { } else {
uncompressedDataIndexSize = inputStream.readLong(); deserializeFromWritable(inputStream);
}
metaIndexCount = inputStream.readInt();
totalUncompressedBytes = inputStream.readLong();
entryCount = majorVersion == 1 ? inputStream.readInt() : inputStream.readLong();
compressionCodec = Compression.Algorithm.values()[inputStream.readInt()];
if (majorVersion > 1) {
numDataIndexLevels = inputStream.readInt();
firstDataBlockOffset = inputStream.readLong();
lastDataBlockOffset = inputStream.readLong();
comparatorClassName =
Bytes.readStringFixedSize(inputStream, MAX_COMPARATOR_NAME_LENGTH);
} }
// The last 4 bytes of the file encode the major and minor version universally
int version = inputStream.readInt(); int version = inputStream.readInt();
expectMajorVersion(extractMajorVersion(version)); expectMajorVersion(extractMajorVersion(version));
expectMinorVersion(extractMinorVersion(version)); expectMinorVersion(extractMinorVersion(version));
} }
/**
* Deserialize the file trailer as protobuf
* @param inputStream
* @throws IOException
*/
void deserializeFromPB(DataInputStream inputStream) throws IOException {
// read PB and skip padding
int start = inputStream.available();
HFileProtos.FileTrailerProto.Builder builder = HFileProtos.FileTrailerProto.newBuilder();
builder.mergeDelimitedFrom(inputStream);
int size = start - inputStream.available();
inputStream.skip(getTrailerSize() - NOT_PB_SIZE - size);
// process the PB
if (builder.hasFileInfoOffset()) {
fileInfoOffset = builder.getFileInfoOffset();
}
if (builder.hasLoadOnOpenDataOffset()) {
loadOnOpenDataOffset = builder.getLoadOnOpenDataOffset();
}
if (builder.hasUncompressedDataIndexSize()) {
uncompressedDataIndexSize = builder.getUncompressedDataIndexSize();
}
if (builder.hasTotalUncompressedBytes()) {
totalUncompressedBytes = builder.getTotalUncompressedBytes();
}
if (builder.hasDataIndexCount()) {
dataIndexCount = builder.getDataIndexCount();
}
if (builder.hasMetaIndexCount()) {
metaIndexCount = builder.getMetaIndexCount();
}
if (builder.hasEntryCount()) {
entryCount = builder.getEntryCount();
}
if (builder.hasNumDataIndexLevels()) {
numDataIndexLevels = builder.getNumDataIndexLevels();
}
if (builder.hasFirstDataBlockOffset()) {
firstDataBlockOffset = builder.getFirstDataBlockOffset();
}
if (builder.hasLastDataBlockOffset()) {
lastDataBlockOffset = builder.getLastDataBlockOffset();
}
if (builder.hasComparatorClassName()) {
comparatorClassName = builder.getComparatorClassName();
}
if (builder.hasCompressionCodec()) {
compressionCodec = Compression.Algorithm.values()[builder.getCompressionCodec()];
} else {
compressionCodec = Compression.Algorithm.NONE;
}
}
/**
* Deserialize the file trailer as writable data
* @param input
* @throws IOException
*/
void deserializeFromWritable(DataInput input) throws IOException {
fileInfoOffset = input.readLong();
loadOnOpenDataOffset = input.readLong();
dataIndexCount = input.readInt();
if (majorVersion == 1) {
input.readLong(); // Read and skip metaIndexOffset.
} else {
uncompressedDataIndexSize = input.readLong();
}
metaIndexCount = input.readInt();
totalUncompressedBytes = input.readLong();
entryCount = majorVersion == 1 ? input.readInt() : input.readLong();
compressionCodec = Compression.Algorithm.values()[input.readInt()];
if (majorVersion > 1) {
numDataIndexLevels = input.readInt();
firstDataBlockOffset = input.readLong();
lastDataBlockOffset = input.readLong();
comparatorClassName = Bytes.readStringFixedSize(input, MAX_COMPARATOR_NAME_LENGTH);
}
}
private void append(StringBuilder sb, String s) { private void append(StringBuilder sb, String s) {
if (sb.length() > 0) if (sb.length() > 0)
sb.append(", "); sb.append(", ");
@ -449,6 +564,10 @@ public class FixedFileTrailer {
this.firstDataBlockOffset = firstDataBlockOffset; this.firstDataBlockOffset = firstDataBlockOffset;
} }
public String getComparatorClassName() {
return comparatorClassName;
}
/** /**
* Returns the major version of this HFile format * Returns the major version of this HFile format
*/ */

View File

@ -77,8 +77,8 @@ public class HFileReaderV2 extends AbstractHFileReader {
static final int MIN_MINOR_VERSION = 0; static final int MIN_MINOR_VERSION = 0;
/** Maximum minor version supported by this HFile format */ /** Maximum minor version supported by this HFile format */
// We went to version 2 when we moved to pb'ing the fileinfo trailer on the file. This version can read Writables // We went to version 2 when we moved to pb'ing fileinfo and the trailer on
// version 1 too. // the file. This version can read Writables version 1.
static final int MAX_MINOR_VERSION = 2; static final int MAX_MINOR_VERSION = 2;
/** /**