From f2dd818201402d0ca8a7049ba7abf77188443a64 Mon Sep 17 00:00:00 2001 From: Tsz-wo Sze Date: Tue, 21 Aug 2012 09:44:46 +0000 Subject: [PATCH] HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum with CRC32C. Contributed by Kihwal Lee git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1375450 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + .../fs/MD5MD5CRC32CastagnoliFileChecksum.java | 41 ++++++++++++ .../hadoop/fs/MD5MD5CRC32FileChecksum.java | 64 +++++++++++++++++-- .../fs/MD5MD5CRC32GzipFileChecksum.java | 40 ++++++++++++ .../org/apache/hadoop/util/DataChecksum.java | 6 +- .../org/apache/hadoop/hdfs/web/JsonUtil.java | 19 +++++- 6 files changed, 164 insertions(+), 9 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 55b08b10a82..8cc69d2b6ba 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -304,6 +304,9 @@ Branch-2 ( Unreleased changes ) HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli) + HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file + checksum with CRC32C. (Kihwal Lee via szetszwo) + BUG FIXES HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java new file mode 100644 index 00000000000..5a4a6a97cc4 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.util.DataChecksum; + +/** For CRC32 with the Castagnoli polynomial */ +public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum { + /** Same as this(0, 0, null) */ + public MD5MD5CRC32CastagnoliFileChecksum() { + this(0, 0, null); + } + + /** Create a MD5FileChecksum */ + public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { + super(bytesPerCRC, crcPerBlock, md5); + } + + @Override + public DataChecksum.Type getCrcType() { + // default to the one that is understood by all releases. + return DataChecksum.Type.CRC32C; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java index af84f398eca..1c697b7f521 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java @@ -23,12 +23,17 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.io.MD5Hash; import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.util.DataChecksum; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.znerd.xmlenc.XMLOutputter; +import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum; +import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum; + /** MD5 of MD5 of CRC32. */ @InterfaceAudience.LimitedPrivate({"HDFS"}) @InterfaceStability.Unstable @@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum { /** {@inheritDoc} */ public String getAlgorithmName() { - return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32"; + return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + + getCrcType().name(); + } + + public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm) + throws IOException { + if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) { + return DataChecksum.Type.CRC32; + } else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) { + return DataChecksum.Type.CRC32C; + } + + throw new IOException("Unknown checksum type in " + algorithm); } /** {@inheritDoc} */ @@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum { return WritableUtils.toByteArray(this); } + /** returns the CRC type */ + public DataChecksum.Type getCrcType() { + // default to the one that is understood by all releases. + return DataChecksum.Type.CRC32; + } + + public ChecksumOpt getChecksumOpt() { + return new ChecksumOpt(getCrcType(), bytesPerCRC); + } + /** {@inheritDoc} */ public void readFields(DataInput in) throws IOException { bytesPerCRC = in.readInt(); @@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum { if (that != null) { xml.attribute("bytesPerCRC", "" + that.bytesPerCRC); xml.attribute("crcPerBlock", "" + that.crcPerBlock); + xml.attribute("crcType", ""+ that.getCrcType().name()); xml.attribute("md5", "" + that.md5); } xml.endTag(); @@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum { final String bytesPerCRC = attrs.getValue("bytesPerCRC"); final String crcPerBlock = attrs.getValue("crcPerBlock"); final String md5 = attrs.getValue("md5"); + String crcType = attrs.getValue("crcType"); + DataChecksum.Type finalCrcType; if (bytesPerCRC == null || crcPerBlock == null || md5 == null) { return null; } try { - return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC), - Integer.valueOf(crcPerBlock), new MD5Hash(md5)); - } catch(Exception e) { + // old versions don't support crcType. + if (crcType == null || crcType == "") { + finalCrcType = DataChecksum.Type.CRC32; + } else { + finalCrcType = DataChecksum.Type.valueOf(crcType); + } + + switch (finalCrcType) { + case CRC32: + return new MD5MD5CRC32GzipFileChecksum( + Integer.valueOf(bytesPerCRC), + Integer.valueOf(crcPerBlock), + new MD5Hash(md5)); + case CRC32C: + return new MD5MD5CRC32CastagnoliFileChecksum( + Integer.valueOf(bytesPerCRC), + Integer.valueOf(crcPerBlock), + new MD5Hash(md5)); + default: + // we should never get here since finalCrcType will + // hold a valid type or we should have got an exception. + return null; + } + } catch (Exception e) { throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC - + ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e); + + ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType + + ", md5=" + md5, e); } } @@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum { public String toString() { return getAlgorithmName() + ":" + md5; } -} \ No newline at end of file +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java new file mode 100644 index 00000000000..5164d0200d2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import org.apache.hadoop.io.MD5Hash; +import org.apache.hadoop.util.DataChecksum; + +/** For CRC32 with the Gzip polynomial */ +public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum { + /** Same as this(0, 0, null) */ + public MD5MD5CRC32GzipFileChecksum() { + this(0, 0, null); + } + + /** Create a MD5FileChecksum */ + public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) { + super(bytesPerCRC, crcPerBlock, md5); + } + @Override + public DataChecksum.Type getCrcType() { + // default to the one that is understood by all releases. + return DataChecksum.Type.CRC32; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java index 2d41f82cd39..4813847e845 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/DataChecksum.java @@ -43,14 +43,16 @@ public class DataChecksum implements Checksum { public static final int CHECKSUM_NULL = 0; public static final int CHECKSUM_CRC32 = 1; public static final int CHECKSUM_CRC32C = 2; - public static final int CHECKSUM_DEFAULT = 3; + public static final int CHECKSUM_DEFAULT = 3; + public static final int CHECKSUM_MIXED = 4; /** The checksum types */ public static enum Type { NULL (CHECKSUM_NULL, 0), CRC32 (CHECKSUM_CRC32, 4), CRC32C(CHECKSUM_CRC32C, 4), - DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum + DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum + MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum public final int id; public final int size; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 912f362728c..f251e34c13a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -29,6 +29,8 @@ import java.util.TreeMap; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileChecksum; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum; +import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSUtil; @@ -43,6 +45,7 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifie import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.StringUtils; import org.mortbay.util.ajax.JSON; @@ -512,7 +515,21 @@ public class JsonUtil { final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes")); final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes)); - final MD5MD5CRC32FileChecksum checksum = new MD5MD5CRC32FileChecksum(); + final DataChecksum.Type crcType = + MD5MD5CRC32FileChecksum.getCrcTypeFromAlgorithmName(algorithm); + final MD5MD5CRC32FileChecksum checksum; + + // Recreate what DFSClient would have returned. + switch(crcType) { + case CRC32: + checksum = new MD5MD5CRC32GzipFileChecksum(); + break; + case CRC32C: + checksum = new MD5MD5CRC32CastagnoliFileChecksum(); + break; + default: + throw new IOException("Unknown algorithm: " + algorithm); + } checksum.readFields(in); //check algorithm name