svn merge -c 1375450 from trunk for HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum with CRC32C.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1375453 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-08-21 09:49:00 +00:00
parent 08bd69a851
commit c0b9b996f9
6 changed files with 164 additions and 9 deletions

View File

@ -101,6 +101,9 @@ Release 2.0.1-alpha - UNRELEASED
HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli) HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli)
HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file
checksum with CRC32C. (Kihwal Lee via szetszwo)
BUG FIXES BUG FIXES
HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname

View File

@ -0,0 +1,41 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.util.DataChecksum;
/** For CRC32 with the Castagnoli polynomial */
public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum {
/** Same as this(0, 0, null) */
public MD5MD5CRC32CastagnoliFileChecksum() {
this(0, 0, null);
}
/** Create a MD5FileChecksum */
public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
super(bytesPerCRC, crcPerBlock, md5);
}
@Override
public DataChecksum.Type getCrcType() {
// default to the one that is understood by all releases.
return DataChecksum.Type.CRC32C;
}
}

View File

@ -23,12 +23,17 @@
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Options.ChecksumOpt;
import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.util.DataChecksum;
import org.xml.sax.Attributes; import org.xml.sax.Attributes;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
import org.znerd.xmlenc.XMLOutputter; import org.znerd.xmlenc.XMLOutputter;
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
/** MD5 of MD5 of CRC32. */ /** MD5 of MD5 of CRC32. */
@InterfaceAudience.LimitedPrivate({"HDFS"}) @InterfaceAudience.LimitedPrivate({"HDFS"})
@InterfaceStability.Unstable @InterfaceStability.Unstable
@ -54,7 +59,19 @@ public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
/** {@inheritDoc} */ /** {@inheritDoc} */
public String getAlgorithmName() { public String getAlgorithmName() {
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32"; return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
getCrcType().name();
}
public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm)
throws IOException {
if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) {
return DataChecksum.Type.CRC32;
} else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) {
return DataChecksum.Type.CRC32C;
}
throw new IOException("Unknown checksum type in " + algorithm);
} }
/** {@inheritDoc} */ /** {@inheritDoc} */
@ -65,6 +82,16 @@ public byte[] getBytes() {
return WritableUtils.toByteArray(this); return WritableUtils.toByteArray(this);
} }
/** returns the CRC type */
public DataChecksum.Type getCrcType() {
// default to the one that is understood by all releases.
return DataChecksum.Type.CRC32;
}
public ChecksumOpt getChecksumOpt() {
return new ChecksumOpt(getCrcType(), bytesPerCRC);
}
/** {@inheritDoc} */ /** {@inheritDoc} */
public void readFields(DataInput in) throws IOException { public void readFields(DataInput in) throws IOException {
bytesPerCRC = in.readInt(); bytesPerCRC = in.readInt();
@ -86,6 +113,7 @@ public static void write(XMLOutputter xml, MD5MD5CRC32FileChecksum that
if (that != null) { if (that != null) {
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC); xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
xml.attribute("crcPerBlock", "" + that.crcPerBlock); xml.attribute("crcPerBlock", "" + that.crcPerBlock);
xml.attribute("crcType", ""+ that.getCrcType().name());
xml.attribute("md5", "" + that.md5); xml.attribute("md5", "" + that.md5);
} }
xml.endTag(); xml.endTag();
@ -97,16 +125,40 @@ public static MD5MD5CRC32FileChecksum valueOf(Attributes attrs
final String bytesPerCRC = attrs.getValue("bytesPerCRC"); final String bytesPerCRC = attrs.getValue("bytesPerCRC");
final String crcPerBlock = attrs.getValue("crcPerBlock"); final String crcPerBlock = attrs.getValue("crcPerBlock");
final String md5 = attrs.getValue("md5"); final String md5 = attrs.getValue("md5");
String crcType = attrs.getValue("crcType");
DataChecksum.Type finalCrcType;
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) { if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
return null; return null;
} }
try { try {
return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC), // old versions don't support crcType.
Integer.valueOf(crcPerBlock), new MD5Hash(md5)); if (crcType == null || crcType == "") {
} catch(Exception e) { finalCrcType = DataChecksum.Type.CRC32;
} else {
finalCrcType = DataChecksum.Type.valueOf(crcType);
}
switch (finalCrcType) {
case CRC32:
return new MD5MD5CRC32GzipFileChecksum(
Integer.valueOf(bytesPerCRC),
Integer.valueOf(crcPerBlock),
new MD5Hash(md5));
case CRC32C:
return new MD5MD5CRC32CastagnoliFileChecksum(
Integer.valueOf(bytesPerCRC),
Integer.valueOf(crcPerBlock),
new MD5Hash(md5));
default:
// we should never get here since finalCrcType will
// hold a valid type or we should have got an exception.
return null;
}
} catch (Exception e) {
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
+ ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e); + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType
+ ", md5=" + md5, e);
} }
} }
@ -114,4 +166,4 @@ public static MD5MD5CRC32FileChecksum valueOf(Attributes attrs
public String toString() { public String toString() {
return getAlgorithmName() + ":" + md5; return getAlgorithmName() + ":" + md5;
} }
} }

View File

@ -0,0 +1,40 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.util.DataChecksum;
/** For CRC32 with the Gzip polynomial */
public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum {
/** Same as this(0, 0, null) */
public MD5MD5CRC32GzipFileChecksum() {
this(0, 0, null);
}
/** Create a MD5FileChecksum */
public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
super(bytesPerCRC, crcPerBlock, md5);
}
@Override
public DataChecksum.Type getCrcType() {
// default to the one that is understood by all releases.
return DataChecksum.Type.CRC32;
}
}

View File

@ -43,14 +43,16 @@ public class DataChecksum implements Checksum {
public static final int CHECKSUM_NULL = 0; public static final int CHECKSUM_NULL = 0;
public static final int CHECKSUM_CRC32 = 1; public static final int CHECKSUM_CRC32 = 1;
public static final int CHECKSUM_CRC32C = 2; public static final int CHECKSUM_CRC32C = 2;
public static final int CHECKSUM_DEFAULT = 3; public static final int CHECKSUM_DEFAULT = 3;
public static final int CHECKSUM_MIXED = 4;
/** The checksum types */ /** The checksum types */
public static enum Type { public static enum Type {
NULL (CHECKSUM_NULL, 0), NULL (CHECKSUM_NULL, 0),
CRC32 (CHECKSUM_CRC32, 4), CRC32 (CHECKSUM_CRC32, 4),
CRC32C(CHECKSUM_CRC32C, 4), CRC32C(CHECKSUM_CRC32C, 4),
DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
public final int id; public final int id;
public final int size; public final int size;

View File

@ -29,6 +29,8 @@
import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtil;
@ -43,6 +45,7 @@
import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.DataChecksum;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.mortbay.util.ajax.JSON; import org.mortbay.util.ajax.JSON;
@ -512,7 +515,21 @@ public static MD5MD5CRC32FileChecksum toMD5MD5CRC32FileChecksum(
final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes")); final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes"));
final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes)); final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
final MD5MD5CRC32FileChecksum checksum = new MD5MD5CRC32FileChecksum(); final DataChecksum.Type crcType =
MD5MD5CRC32FileChecksum.getCrcTypeFromAlgorithmName(algorithm);
final MD5MD5CRC32FileChecksum checksum;
// Recreate what DFSClient would have returned.
switch(crcType) {
case CRC32:
checksum = new MD5MD5CRC32GzipFileChecksum();
break;
case CRC32C:
checksum = new MD5MD5CRC32CastagnoliFileChecksum();
break;
default:
throw new IOException("Unknown algorithm: " + algorithm);
}
checksum.readFields(in); checksum.readFields(in);
//check algorithm name //check algorithm name