HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum with CRC32C. Contributed by Kihwal Lee

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1375450 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-08-21 09:44:46 +00:00
parent a6262430ff
commit f2dd818201
6 changed files with 164 additions and 9 deletions

View File

@ -304,6 +304,9 @@ Branch-2 ( Unreleased changes )
HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli)
HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file
checksum with CRC32C. (Kihwal Lee via szetszwo)
BUG FIXES
HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.util.DataChecksum;
/** For CRC32 with the Castagnoli polynomial */
public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum {
/** Same as this(0, 0, null) */
public MD5MD5CRC32CastagnoliFileChecksum() {
this(0, 0, null);
}
/** Create a MD5FileChecksum */
public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
super(bytesPerCRC, crcPerBlock, md5);
}
@Override
public DataChecksum.Type getCrcType() {
// default to the one that is understood by all releases.
return DataChecksum.Type.CRC32C;
}
}

View File

@ -23,12 +23,17 @@ import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Options.ChecksumOpt;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.util.DataChecksum;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.znerd.xmlenc.XMLOutputter;
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
/** MD5 of MD5 of CRC32. */
@InterfaceAudience.LimitedPrivate({"HDFS"})
@InterfaceStability.Unstable
@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
/** {@inheritDoc} */
public String getAlgorithmName() {
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
getCrcType().name();
}
public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm)
throws IOException {
if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) {
return DataChecksum.Type.CRC32;
} else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) {
return DataChecksum.Type.CRC32C;
}
throw new IOException("Unknown checksum type in " + algorithm);
}
/** {@inheritDoc} */
@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
return WritableUtils.toByteArray(this);
}
/** returns the CRC type */
public DataChecksum.Type getCrcType() {
// default to the one that is understood by all releases.
return DataChecksum.Type.CRC32;
}
public ChecksumOpt getChecksumOpt() {
return new ChecksumOpt(getCrcType(), bytesPerCRC);
}
/** {@inheritDoc} */
public void readFields(DataInput in) throws IOException {
bytesPerCRC = in.readInt();
@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
if (that != null) {
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
xml.attribute("crcPerBlock", "" + that.crcPerBlock);
xml.attribute("crcType", ""+ that.getCrcType().name());
xml.attribute("md5", "" + that.md5);
}
xml.endTag();
@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
final String bytesPerCRC = attrs.getValue("bytesPerCRC");
final String crcPerBlock = attrs.getValue("crcPerBlock");
final String md5 = attrs.getValue("md5");
String crcType = attrs.getValue("crcType");
DataChecksum.Type finalCrcType;
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
return null;
}
try {
return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
Integer.valueOf(crcPerBlock), new MD5Hash(md5));
} catch(Exception e) {
// old versions don't support crcType.
if (crcType == null || crcType == "") {
finalCrcType = DataChecksum.Type.CRC32;
} else {
finalCrcType = DataChecksum.Type.valueOf(crcType);
}
switch (finalCrcType) {
case CRC32:
return new MD5MD5CRC32GzipFileChecksum(
Integer.valueOf(bytesPerCRC),
Integer.valueOf(crcPerBlock),
new MD5Hash(md5));
case CRC32C:
return new MD5MD5CRC32CastagnoliFileChecksum(
Integer.valueOf(bytesPerCRC),
Integer.valueOf(crcPerBlock),
new MD5Hash(md5));
default:
// we should never get here since finalCrcType will
// hold a valid type or we should have got an exception.
return null;
}
} catch (Exception e) {
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
+ ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
+ ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType
+ ", md5=" + md5, e);
}
}
@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
public String toString() {
return getAlgorithmName() + ":" + md5;
}
}
}

View File

@ -0,0 +1,40 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.util.DataChecksum;
/** For CRC32 with the Gzip polynomial */
public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum {
/** Same as this(0, 0, null) */
public MD5MD5CRC32GzipFileChecksum() {
this(0, 0, null);
}
/** Create a MD5FileChecksum */
public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
super(bytesPerCRC, crcPerBlock, md5);
}
@Override
public DataChecksum.Type getCrcType() {
// default to the one that is understood by all releases.
return DataChecksum.Type.CRC32;
}
}

View File

@ -43,14 +43,16 @@ public class DataChecksum implements Checksum {
public static final int CHECKSUM_NULL = 0;
public static final int CHECKSUM_CRC32 = 1;
public static final int CHECKSUM_CRC32C = 2;
public static final int CHECKSUM_DEFAULT = 3;
public static final int CHECKSUM_DEFAULT = 3;
public static final int CHECKSUM_MIXED = 4;
/** The checksum types */
public static enum Type {
NULL (CHECKSUM_NULL, 0),
CRC32 (CHECKSUM_CRC32, 4),
CRC32C(CHECKSUM_CRC32C, 4),
DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum
DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
public final int id;
public final int size;

View File

@ -29,6 +29,8 @@ import java.util.TreeMap;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSUtil;
@ -43,6 +45,7 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifie
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.StringUtils;
import org.mortbay.util.ajax.JSON;
@ -512,7 +515,21 @@ public class JsonUtil {
final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes"));
final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
final MD5MD5CRC32FileChecksum checksum = new MD5MD5CRC32FileChecksum();
final DataChecksum.Type crcType =
MD5MD5CRC32FileChecksum.getCrcTypeFromAlgorithmName(algorithm);
final MD5MD5CRC32FileChecksum checksum;
// Recreate what DFSClient would have returned.
switch(crcType) {
case CRC32:
checksum = new MD5MD5CRC32GzipFileChecksum();
break;
case CRC32C:
checksum = new MD5MD5CRC32CastagnoliFileChecksum();
break;
default:
throw new IOException("Unknown algorithm: " + algorithm);
}
checksum.readFields(in);
//check algorithm name