svn merge -c 1376928 from trunk for HDFS-3177. Update DFSClient and DataXceiver to handle different checkum types in file checksum computation.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1376937 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6175155cf4
commit
a26cfc347f
|
@ -227,6 +227,9 @@ Release 2.0.1-alpha - UNRELEASED
|
||||||
HDFS-3819. Should check whether invalidate work percentage default value is
|
HDFS-3819. Should check whether invalidate work percentage default value is
|
||||||
not greater than 1.0f. (Jing Zhao via jitendra)
|
not greater than 1.0f. (Jing Zhao via jitendra)
|
||||||
|
|
||||||
|
HDFS-3177. Update DFSClient and DataXceiver to handle different checkum
|
||||||
|
types in file checksum computation. (Kihwal Lee via szetszwo)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-2982. Startup performance suffers when there are many edit log
|
HDFS-2982. Startup performance suffers when there are many edit log
|
||||||
|
|
|
@ -91,7 +91,9 @@ import org.apache.hadoop.fs.FsServerDefaults;
|
||||||
import org.apache.hadoop.fs.FsStatus;
|
import org.apache.hadoop.fs.FsStatus;
|
||||||
import org.apache.hadoop.fs.HdfsBlockLocation;
|
import org.apache.hadoop.fs.HdfsBlockLocation;
|
||||||
import org.apache.hadoop.fs.InvalidPathException;
|
import org.apache.hadoop.fs.InvalidPathException;
|
||||||
|
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
|
||||||
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
|
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
|
||||||
|
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
|
||||||
import org.apache.hadoop.fs.Options;
|
import org.apache.hadoop.fs.Options;
|
||||||
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
||||||
import org.apache.hadoop.fs.ParentNotDirectoryException;
|
import org.apache.hadoop.fs.ParentNotDirectoryException;
|
||||||
|
@ -1641,7 +1643,8 @@ public class DFSClient implements java.io.Closeable {
|
||||||
}
|
}
|
||||||
List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
|
List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks();
|
||||||
final DataOutputBuffer md5out = new DataOutputBuffer();
|
final DataOutputBuffer md5out = new DataOutputBuffer();
|
||||||
int bytesPerCRC = 0;
|
int bytesPerCRC = -1;
|
||||||
|
DataChecksum.Type crcType = DataChecksum.Type.DEFAULT;
|
||||||
long crcPerBlock = 0;
|
long crcPerBlock = 0;
|
||||||
boolean refetchBlocks = false;
|
boolean refetchBlocks = false;
|
||||||
int lastRetriedIndex = -1;
|
int lastRetriedIndex = -1;
|
||||||
|
@ -1745,6 +1748,17 @@ public class DFSClient implements java.io.Closeable {
|
||||||
checksumData.getMd5().toByteArray());
|
checksumData.getMd5().toByteArray());
|
||||||
md5.write(md5out);
|
md5.write(md5out);
|
||||||
|
|
||||||
|
// read crc-type
|
||||||
|
final DataChecksum.Type ct = HdfsProtoUtil.
|
||||||
|
fromProto(checksumData.getCrcType());
|
||||||
|
if (i == 0) { // first block
|
||||||
|
crcType = ct;
|
||||||
|
} else if (crcType != DataChecksum.Type.MIXED
|
||||||
|
&& crcType != ct) {
|
||||||
|
// if crc types are mixed in a file
|
||||||
|
crcType = DataChecksum.Type.MIXED;
|
||||||
|
}
|
||||||
|
|
||||||
done = true;
|
done = true;
|
||||||
|
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
|
@ -1770,7 +1784,18 @@ public class DFSClient implements java.io.Closeable {
|
||||||
|
|
||||||
//compute file MD5
|
//compute file MD5
|
||||||
final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
|
final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData());
|
||||||
return new MD5MD5CRC32FileChecksum(bytesPerCRC, crcPerBlock, fileMD5);
|
switch (crcType) {
|
||||||
|
case CRC32:
|
||||||
|
return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC,
|
||||||
|
crcPerBlock, fileMD5);
|
||||||
|
case CRC32C:
|
||||||
|
return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC,
|
||||||
|
crcPerBlock, fileMD5);
|
||||||
|
default:
|
||||||
|
// we should never get here since the validity was checked
|
||||||
|
// when getCrcType() was called above.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos;
|
||||||
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
|
||||||
import org.apache.hadoop.hdfs.util.ExactSizeInputStream;
|
import org.apache.hadoop.hdfs.util.ExactSizeInputStream;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
@ -155,6 +156,14 @@ public abstract class HdfsProtoUtil {
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static DataChecksum.Type fromProto(HdfsProtos.ChecksumTypeProto type) {
|
||||||
|
return DataChecksum.Type.valueOf(type.name());
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HdfsProtos.ChecksumTypeProto toProto(DataChecksum.Type type) {
|
||||||
|
return HdfsProtos.ChecksumTypeProto.valueOf(type.name());
|
||||||
|
}
|
||||||
|
|
||||||
public static InputStream vintPrefixed(final InputStream input)
|
public static InputStream vintPrefixed(final InputStream input)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final int firstByte = input.read();
|
final int firstByte = input.read();
|
||||||
|
@ -167,4 +176,4 @@ public abstract class HdfsProtoUtil {
|
||||||
|
|
||||||
return new ExactSizeInputStream(input, size);
|
return new ExactSizeInputStream(input, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -609,6 +609,7 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
.setBytesPerCrc(bytesPerCRC)
|
.setBytesPerCrc(bytesPerCRC)
|
||||||
.setCrcPerBlock(crcPerBlock)
|
.setCrcPerBlock(crcPerBlock)
|
||||||
.setMd5(ByteString.copyFrom(md5.getDigest()))
|
.setMd5(ByteString.copyFrom(md5.getDigest()))
|
||||||
|
.setCrcType(HdfsProtoUtil.toProto(checksum.getChecksumType()))
|
||||||
)
|
)
|
||||||
.build()
|
.build()
|
||||||
.writeDelimitedTo(out);
|
.writeDelimitedTo(out);
|
||||||
|
|
|
@ -185,4 +185,5 @@ message OpBlockChecksumResponseProto {
|
||||||
required uint32 bytesPerCrc = 1;
|
required uint32 bytesPerCrc = 1;
|
||||||
required uint64 crcPerBlock = 2;
|
required uint64 crcPerBlock = 2;
|
||||||
required bytes md5 = 3;
|
required bytes md5 = 3;
|
||||||
|
optional ChecksumTypeProto crcType = 4 [default = CRC32];
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileChecksum;
|
import org.apache.hadoop.fs.FileChecksum;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
|
||||||
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.VolumeId;
|
import org.apache.hadoop.fs.VolumeId;
|
||||||
|
@ -708,9 +709,16 @@ public class TestDistributedFileSystem {
|
||||||
out2.close();
|
out2.close();
|
||||||
|
|
||||||
// the two checksums must be different.
|
// the two checksums must be different.
|
||||||
FileChecksum sum1 = dfs.getFileChecksum(path1);
|
MD5MD5CRC32FileChecksum sum1 =
|
||||||
FileChecksum sum2 = dfs.getFileChecksum(path2);
|
(MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path1);
|
||||||
|
MD5MD5CRC32FileChecksum sum2 =
|
||||||
|
(MD5MD5CRC32FileChecksum)dfs.getFileChecksum(path2);
|
||||||
assertFalse(sum1.equals(sum2));
|
assertFalse(sum1.equals(sum2));
|
||||||
|
|
||||||
|
// check the individual params
|
||||||
|
assertEquals(DataChecksum.Type.CRC32C, sum1.getCrcType());
|
||||||
|
assertEquals(DataChecksum.Type.CRC32, sum2.getCrcType());
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
if (cluster != null) {
|
if (cluster != null) {
|
||||||
cluster.getFileSystem().delete(testBasePath, true);
|
cluster.getFileSystem().delete(testBasePath, true);
|
||||||
|
|
Loading…
Reference in New Issue