svn merge -c 1376928 from trunk for HDFS-3177. Update DFSClient and DataXceiver to handle different checkum types in file checksum computation.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1376937 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-08-24 14:24:59 +00:00
parent 6175155cf4
commit a26cfc347f
6 changed files with 52 additions and 5 deletions

View File

@ -227,6 +227,9 @@ Release 2.0.1-alpha - UNRELEASED
HDFS-3819. Should check whether invalidate work percentage default value is HDFS-3819. Should check whether invalidate work percentage default value is
not greater than 1.0f. (Jing Zhao via jitendra) not greater than 1.0f. (Jing Zhao via jitendra)
HDFS-3177. Update DFSClient and DataXceiver to handle different checkum
types in file checksum computation. (Kihwal Lee via szetszwo)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-2982. Startup performance suffers when there are many edit log HDFS-2982. Startup performance suffers when there are many edit log

View File

@ -91,7 +91,9 @@ import org.apache.hadoop.fs.FsServerDefaults;
import org.apache.hadoop.fs.FsStatus; import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.fs.HdfsBlockLocation; import org.apache.hadoop.fs.HdfsBlockLocation;
import org.apache.hadoop.fs.InvalidPathException; import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.fs.Options.ChecksumOpt;
import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.ParentNotDirectoryException;
@ -1641,7 +1643,8 @@ public class DFSClient implements java.io.Closeable {
} }
List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks(); List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
final DataOutputBuffer md5out = new DataOutputBuffer(); final DataOutputBuffer md5out = new DataOutputBuffer();
int bytesPerCRC = 0; int bytesPerCRC = -1;
DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
long crcPerBlock = 0; long crcPerBlock = 0;
boolean refetchBlocks = false; boolean refetchBlocks = false;
int lastRetriedIndex = -1; int lastRetriedIndex = -1;
@ -1745,6 +1748,17 @@ public class DFSClient implements java.io.Closeable {
checksumData.getMd5().toByteArray()); checksumData.getMd5().toByteArray());
md5.write(md5out); md5.write(md5out);
// read crc-type
final DataChecksum.Type ct = HdfsProtoUtil.
fromProto(checksumData.getCrcType());
if (i == 0) { // first block
crcType = ct;
} else if (crcType != DataChecksum.Type.MIXED
&& crcType != ct) {
// if crc types are mixed in a file
crcType = DataChecksum.Type.MIXED;
}
done = true; done = true;
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
@ -1770,7 +1784,18 @@ public class DFSClient implements java.io.Closeable {
//compute file MD5 //compute file MD5
final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
return new MD5MD5CRC32FileChecksum(bytesPerCRC, crcPerBlock, fileMD5); switch (crcType) {
case CRC32:
return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC,
crcPerBlock, fileMD5);
case CRC32C:
return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC,
crcPerBlock, fileMD5);
default:
// we should never get here since the validity was checked
// when getCrcType() was called above.
return null;
}
} }
/** /**

View File

@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.util.ExactSizeInputStream; import org.apache.hadoop.hdfs.util.ExactSizeInputStream;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.Token;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
@ -155,6 +156,14 @@ public abstract class HdfsProtoUtil {
return ret; return ret;
} }
public static DataChecksum.Type fromProto(HdfsProtos.ChecksumTypeProto type) {
return DataChecksum.Type.valueOf(type.name());
}
public static HdfsProtos.ChecksumTypeProto toProto(DataChecksum.Type type) {
return HdfsProtos.ChecksumTypeProto.valueOf(type.name());
}
public static InputStream vintPrefixed(final InputStream input) public static InputStream vintPrefixed(final InputStream input)
throws IOException { throws IOException {
final int firstByte = input.read(); final int firstByte = input.read();

View File

@ -609,6 +609,7 @@ class DataXceiver extends Receiver implements Runnable {
.setBytesPerCrc(bytesPerCRC) .setBytesPerCrc(bytesPerCRC)
.setCrcPerBlock(crcPerBlock) .setCrcPerBlock(crcPerBlock)
.setMd5(ByteString.copyFrom(md5.getDigest())) .setMd5(ByteString.copyFrom(md5.getDigest()))
.setCrcType(HdfsProtoUtil.toProto(checksum.getChecksumType()))
) )
.build() .build()
.writeDelimitedTo(out); .writeDelimitedTo(out);

View File

@ -185,4 +185,5 @@ message OpBlockChecksumResponseProto {
required uint32 bytesPerCrc = 1; required uint32 bytesPerCrc = 1;
required uint64 crcPerBlock = 2; required uint64 crcPerBlock = 2;
required bytes md5 = 3; required bytes md5 = 3;
optional ChecksumTypeProto crcType = 4 [default = CRC32];
} }

View File

@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.fs.Options.ChecksumOpt;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.VolumeId; import org.apache.hadoop.fs.VolumeId;
@ -708,9 +709,16 @@ public class TestDistributedFileSystem {
out2.close(); out2.close();
// the two checksums must be different. // the two checksums must be different.
FileChecksum sum1 = dfs.getFileChecksum(path1); MD5MD5CRC32FileChecksum sum1 =
FileChecksum sum2 = dfs.getFileChecksum(path2); (MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1);
MD5MD5CRC32FileChecksum sum2 =
(MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2);
assertFalse(sum1.equals(sum2)); assertFalse(sum1.equals(sum2));
// check the individual params
assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
assertEquals(DataChecksum.Type.CRC32, sum2.getCrcType());
} finally { } finally {
if (cluster != null) { if (cluster != null) {
cluster.getFileSystem().delete(testBasePath, true); cluster.getFileSystem().delete(testBasePath, true);