HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum with CRC32C. Contributed by Kihwal Lee
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1375450 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a6262430ff
commit
f2dd818201
|
@ -304,6 +304,9 @@ Branch-2 ( Unreleased changes )
|
|||
|
||||
HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli)
|
||||
|
||||
HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file
|
||||
checksum with CRC32C. (Kihwal Lee via szetszwo)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import org.apache.hadoop.io.MD5Hash;
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
|
||||
/** For CRC32 with the Castagnoli polynomial */
|
||||
public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum {
|
||||
/** Same as this(0, 0, null) */
|
||||
public MD5MD5CRC32CastagnoliFileChecksum() {
|
||||
this(0, 0, null);
|
||||
}
|
||||
|
||||
/** Create a MD5FileChecksum */
|
||||
public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
|
||||
super(bytesPerCRC, crcPerBlock, md5);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataChecksum.Type getCrcType() {
|
||||
// default to the one that is understood by all releases.
|
||||
return DataChecksum.Type.CRC32C;
|
||||
}
|
||||
}
|
|
@ -23,12 +23,17 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
||||
import org.apache.hadoop.io.MD5Hash;
|
||||
import org.apache.hadoop.io.WritableUtils;
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.znerd.xmlenc.XMLOutputter;
|
||||
|
||||
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
|
||||
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
|
||||
|
||||
/** MD5 of MD5 of CRC32. */
|
||||
@InterfaceAudience.LimitedPrivate({"HDFS"})
|
||||
@InterfaceStability.Unstable
|
||||
|
@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|||
|
||||
/** {@inheritDoc} */
|
||||
public String getAlgorithmName() {
|
||||
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
|
||||
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
|
||||
getCrcType().name();
|
||||
}
|
||||
|
||||
public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm)
|
||||
throws IOException {
|
||||
if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) {
|
||||
return DataChecksum.Type.CRC32;
|
||||
} else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) {
|
||||
return DataChecksum.Type.CRC32C;
|
||||
}
|
||||
|
||||
throw new IOException("Unknown checksum type in " + algorithm);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
|
@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|||
return WritableUtils.toByteArray(this);
|
||||
}
|
||||
|
||||
/** returns the CRC type */
|
||||
public DataChecksum.Type getCrcType() {
|
||||
// default to the one that is understood by all releases.
|
||||
return DataChecksum.Type.CRC32;
|
||||
}
|
||||
|
||||
public ChecksumOpt getChecksumOpt() {
|
||||
return new ChecksumOpt(getCrcType(), bytesPerCRC);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void readFields(DataInput in) throws IOException {
|
||||
bytesPerCRC = in.readInt();
|
||||
|
@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|||
if (that != null) {
|
||||
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
|
||||
xml.attribute("crcPerBlock", "" + that.crcPerBlock);
|
||||
xml.attribute("crcType", ""+ that.getCrcType().name());
|
||||
xml.attribute("md5", "" + that.md5);
|
||||
}
|
||||
xml.endTag();
|
||||
|
@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|||
final String bytesPerCRC = attrs.getValue("bytesPerCRC");
|
||||
final String crcPerBlock = attrs.getValue("crcPerBlock");
|
||||
final String md5 = attrs.getValue("md5");
|
||||
String crcType = attrs.getValue("crcType");
|
||||
DataChecksum.Type finalCrcType;
|
||||
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
|
||||
Integer.valueOf(crcPerBlock), new MD5Hash(md5));
|
||||
} catch(Exception e) {
|
||||
// old versions don't support crcType.
|
||||
if (crcType == null || crcType == "") {
|
||||
finalCrcType = DataChecksum.Type.CRC32;
|
||||
} else {
|
||||
finalCrcType = DataChecksum.Type.valueOf(crcType);
|
||||
}
|
||||
|
||||
switch (finalCrcType) {
|
||||
case CRC32:
|
||||
return new MD5MD5CRC32GzipFileChecksum(
|
||||
Integer.valueOf(bytesPerCRC),
|
||||
Integer.valueOf(crcPerBlock),
|
||||
new MD5Hash(md5));
|
||||
case CRC32C:
|
||||
return new MD5MD5CRC32CastagnoliFileChecksum(
|
||||
Integer.valueOf(bytesPerCRC),
|
||||
Integer.valueOf(crcPerBlock),
|
||||
new MD5Hash(md5));
|
||||
default:
|
||||
// we should never get here since finalCrcType will
|
||||
// hold a valid type or we should have got an exception.
|
||||
return null;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
|
||||
+ ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
|
||||
+ ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType
|
||||
+ ", md5=" + md5, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
|||
public String toString() {
|
||||
return getAlgorithmName() + ":" + md5;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs;
|
||||
|
||||
import org.apache.hadoop.io.MD5Hash;
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
|
||||
/** For CRC32 with the Gzip polynomial */
|
||||
public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum {
|
||||
/** Same as this(0, 0, null) */
|
||||
public MD5MD5CRC32GzipFileChecksum() {
|
||||
this(0, 0, null);
|
||||
}
|
||||
|
||||
/** Create a MD5FileChecksum */
|
||||
public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
|
||||
super(bytesPerCRC, crcPerBlock, md5);
|
||||
}
|
||||
@Override
|
||||
public DataChecksum.Type getCrcType() {
|
||||
// default to the one that is understood by all releases.
|
||||
return DataChecksum.Type.CRC32;
|
||||
}
|
||||
}
|
|
@ -43,14 +43,16 @@ public class DataChecksum implements Checksum {
|
|||
public static final int CHECKSUM_NULL = 0;
|
||||
public static final int CHECKSUM_CRC32 = 1;
|
||||
public static final int CHECKSUM_CRC32C = 2;
|
||||
public static final int CHECKSUM_DEFAULT = 3;
|
||||
public static final int CHECKSUM_DEFAULT = 3;
|
||||
public static final int CHECKSUM_MIXED = 4;
|
||||
|
||||
/** The checksum types */
|
||||
public static enum Type {
|
||||
NULL (CHECKSUM_NULL, 0),
|
||||
CRC32 (CHECKSUM_CRC32, 4),
|
||||
CRC32C(CHECKSUM_CRC32C, 4),
|
||||
DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum
|
||||
DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
|
||||
MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
|
||||
|
||||
public final int id;
|
||||
public final int size;
|
||||
|
|
|
@ -29,6 +29,8 @@ import java.util.TreeMap;
|
|||
import org.apache.hadoop.fs.ContentSummary;
|
||||
import org.apache.hadoop.fs.FileChecksum;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
|
||||
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
|
||||
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.hdfs.DFSUtil;
|
||||
|
@ -43,6 +45,7 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifie
|
|||
import org.apache.hadoop.ipc.RemoteException;
|
||||
import org.apache.hadoop.security.token.Token;
|
||||
import org.apache.hadoop.security.token.TokenIdentifier;
|
||||
import org.apache.hadoop.util.DataChecksum;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.mortbay.util.ajax.JSON;
|
||||
|
||||
|
@ -512,7 +515,21 @@ public class JsonUtil {
|
|||
final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes"));
|
||||
|
||||
final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
|
||||
final MD5MD5CRC32FileChecksum checksum = new MD5MD5CRC32FileChecksum();
|
||||
final DataChecksum.Type crcType =
|
||||
MD5MD5CRC32FileChecksum.getCrcTypeFromAlgorithmName(algorithm);
|
||||
final MD5MD5CRC32FileChecksum checksum;
|
||||
|
||||
// Recreate what DFSClient would have returned.
|
||||
switch(crcType) {
|
||||
case CRC32:
|
||||
checksum = new MD5MD5CRC32GzipFileChecksum();
|
||||
break;
|
||||
case CRC32C:
|
||||
checksum = new MD5MD5CRC32CastagnoliFileChecksum();
|
||||
break;
|
||||
default:
|
||||
throw new IOException("Unknown algorithm: " + algorithm);
|
||||
}
|
||||
checksum.readFields(in);
|
||||
|
||||
//check algorithm name
|
||||
|
|
Loading…
Reference in New Issue