HDFS-3177. Update DFSClient and DataXceiver to handle different checkum types in file checksum computation. Contributed by Kihwal Lee

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1376928 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-08-24 14:15:14 +00:00
parent bbf1f55bee
commit c46de830da
6 changed files with 52 additions and 5 deletions

View File

@ -421,6 +421,9 @@ Branch-2 ( Unreleased changes )
HDFS-3832. Remove protocol methods related to DistributedUpgrade. (suresh)
HDFS-3177. Update DFSClient and DataXceiver to handle different checkum
types in file checksum computation. (Kihwal Lee via szetszwo)
OPTIMIZATIONS
HDFS-2982. Startup performance suffers when there are many edit log

View File

@ -89,7 +89,9 @@
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.fs.HdfsBlockLocation;
import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Options.ChecksumOpt;
import org.apache.hadoop.fs.ParentNotDirectoryException;
@ -1603,7 +1605,8 @@ public static MD5MD5CRC32FileChecksum getFileChecksum(String src,
}
List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
final DataOutputBuffer md5out = new DataOutputBuffer();
int bytesPerCRC = 0;
int bytesPerCRC = -1;
DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
long crcPerBlock = 0;
boolean refetchBlocks = false;
int lastRetriedIndex = -1;
@ -1707,6 +1710,17 @@ else if (bpc != bytesPerCRC) {
checksumData.getMd5().toByteArray());
md5.write(md5out);
// read crc-type
final DataChecksum.Type ct = HdfsProtoUtil.
fromProto(checksumData.getCrcType());
if (i == 0) { // first block
crcType = ct;
} else if (crcType != DataChecksum.Type.MIXED
&& crcType != ct) {
// if crc types are mixed in a file
crcType = DataChecksum.Type.MIXED;
}
done = true;
if (LOG.isDebugEnabled()) {
@ -1732,7 +1746,18 @@ else if (bpc != bytesPerCRC) {
//compute file MD5
final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
return new MD5MD5CRC32FileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
switch (crcType) {
case CRC32:
return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC,
crcPerBlock, fileMD5);
case CRC32C:
return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC,
crcPerBlock, fileMD5);
default:
// we should never get here since the validity was checked
// when getCrcType() was called above.
return null;
}
}
/**

View File

@ -29,6 +29,7 @@
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.util.ExactSizeInputStream;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.security.token.Token;
import com.google.common.collect.Lists;
@ -155,6 +156,14 @@ public static DatanodeInfo[] fromProtos(
return ret;
}
public static DataChecksum.Type fromProto(HdfsProtos.ChecksumTypeProto type) {
return DataChecksum.Type.valueOf(type.name());
}
public static HdfsProtos.ChecksumTypeProto toProto(DataChecksum.Type type) {
return HdfsProtos.ChecksumTypeProto.valueOf(type.name());
}
public static InputStream vintPrefixed(final InputStream input)
throws IOException {
final int firstByte = input.read();
@ -167,4 +176,4 @@ public static InputStream vintPrefixed(final InputStream input)
return new ExactSizeInputStream(input, size);
}
}
}

View File

@ -609,6 +609,7 @@ public void blockChecksum(final ExtendedBlock block,
.setBytesPerCrc(bytesPerCRC)
.setCrcPerBlock(crcPerBlock)
.setMd5(ByteString.copyFrom(md5.getDigest()))
.setCrcType(HdfsProtoUtil.toProto(checksum.getChecksumType()))
)
.build()
.writeDelimitedTo(out);

View File

@ -185,4 +185,5 @@ message OpBlockChecksumResponseProto {
required uint32 bytesPerCrc = 1;
required uint64 crcPerBlock = 2;
required bytes md5 = 3;
optional ChecksumTypeProto crcType = 4 [default = CRC32];
}

View File

@ -43,6 +43,7 @@
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.Options.ChecksumOpt;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.VolumeId;
@ -708,9 +709,16 @@ public void testCreateWithCustomChecksum() throws Exception {
out2.close();
// the two checksums must be different.
FileChecksum sum1 = dfs.getFileChecksum(path1);
FileChecksum sum2 = dfs.getFileChecksum(path2);
MD5MD5CRC32FileChecksum sum1 =
(MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1);
MD5MD5CRC32FileChecksum sum2 =
(MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2);
assertFalse(sum1.equals(sum2));
// check the individual params
assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
assertEquals(DataChecksum.Type.CRC32, sum2.getCrcType());
} finally {
if (cluster != null) {
cluster.getFileSystem().delete(testBasePath, true);