svn merge -c 1375450 from trunk for HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file checksum with CRC32C.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1375453 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
08bd69a851
commit
c0b9b996f9
|
@ -101,6 +101,9 @@ Release 2.0.1-alpha - UNRELEASED
|
||||||
|
|
||||||
HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli)
|
HADOOP-8686. Fix warnings in native code. (Colin Patrick McCabe via eli)
|
||||||
|
|
||||||
|
HADOOP-8239. Add subclasses of MD5MD5CRC32FileChecksum to support file
|
||||||
|
checksum with CRC32C. (Kihwal Lee via szetszwo)
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
||||||
HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname
|
HADOOP-8372. NetUtils.normalizeHostName() incorrectly handles hostname
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.MD5Hash;
|
||||||
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
|
|
||||||
|
/** For CRC32 with the Castagnoli polynomial */
|
||||||
|
public class MD5MD5CRC32CastagnoliFileChecksum extends MD5MD5CRC32FileChecksum {
|
||||||
|
/** Same as this(0, 0, null) */
|
||||||
|
public MD5MD5CRC32CastagnoliFileChecksum() {
|
||||||
|
this(0, 0, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a MD5FileChecksum */
|
||||||
|
public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
|
||||||
|
super(bytesPerCRC, crcPerBlock, md5);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DataChecksum.Type getCrcType() {
|
||||||
|
// default to the one that is understood by all releases.
|
||||||
|
return DataChecksum.Type.CRC32C;
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,12 +23,17 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
||||||
import org.apache.hadoop.io.MD5Hash;
|
import org.apache.hadoop.io.MD5Hash;
|
||||||
import org.apache.hadoop.io.WritableUtils;
|
import org.apache.hadoop.io.WritableUtils;
|
||||||
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
import org.xml.sax.Attributes;
|
import org.xml.sax.Attributes;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
import org.znerd.xmlenc.XMLOutputter;
|
import org.znerd.xmlenc.XMLOutputter;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
|
||||||
|
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
|
||||||
|
|
||||||
/** MD5 of MD5 of CRC32. */
|
/** MD5 of MD5 of CRC32. */
|
||||||
@InterfaceAudience.LimitedPrivate({"HDFS"})
|
@InterfaceAudience.LimitedPrivate({"HDFS"})
|
||||||
@InterfaceStability.Unstable
|
@InterfaceStability.Unstable
|
||||||
|
@ -54,7 +59,19 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
public String getAlgorithmName() {
|
public String getAlgorithmName() {
|
||||||
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC + "CRC32";
|
return "MD5-of-" + crcPerBlock + "MD5-of-" + bytesPerCRC +
|
||||||
|
getCrcType().name();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static DataChecksum.Type getCrcTypeFromAlgorithmName(String algorithm)
|
||||||
|
throws IOException {
|
||||||
|
if (algorithm.endsWith(DataChecksum.Type.CRC32.name())) {
|
||||||
|
return DataChecksum.Type.CRC32;
|
||||||
|
} else if (algorithm.endsWith(DataChecksum.Type.CRC32C.name())) {
|
||||||
|
return DataChecksum.Type.CRC32C;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new IOException("Unknown checksum type in " + algorithm);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
|
@ -65,6 +82,16 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
||||||
return WritableUtils.toByteArray(this);
|
return WritableUtils.toByteArray(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** returns the CRC type */
|
||||||
|
public DataChecksum.Type getCrcType() {
|
||||||
|
// default to the one that is understood by all releases.
|
||||||
|
return DataChecksum.Type.CRC32;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ChecksumOpt getChecksumOpt() {
|
||||||
|
return new ChecksumOpt(getCrcType(), bytesPerCRC);
|
||||||
|
}
|
||||||
|
|
||||||
/** {@inheritDoc} */
|
/** {@inheritDoc} */
|
||||||
public void readFields(DataInput in) throws IOException {
|
public void readFields(DataInput in) throws IOException {
|
||||||
bytesPerCRC = in.readInt();
|
bytesPerCRC = in.readInt();
|
||||||
|
@ -86,6 +113,7 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
||||||
if (that != null) {
|
if (that != null) {
|
||||||
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
|
xml.attribute("bytesPerCRC", "" + that.bytesPerCRC);
|
||||||
xml.attribute("crcPerBlock", "" + that.crcPerBlock);
|
xml.attribute("crcPerBlock", "" + that.crcPerBlock);
|
||||||
|
xml.attribute("crcType", ""+ that.getCrcType().name());
|
||||||
xml.attribute("md5", "" + that.md5);
|
xml.attribute("md5", "" + that.md5);
|
||||||
}
|
}
|
||||||
xml.endTag();
|
xml.endTag();
|
||||||
|
@ -97,16 +125,40 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
||||||
final String bytesPerCRC = attrs.getValue("bytesPerCRC");
|
final String bytesPerCRC = attrs.getValue("bytesPerCRC");
|
||||||
final String crcPerBlock = attrs.getValue("crcPerBlock");
|
final String crcPerBlock = attrs.getValue("crcPerBlock");
|
||||||
final String md5 = attrs.getValue("md5");
|
final String md5 = attrs.getValue("md5");
|
||||||
|
String crcType = attrs.getValue("crcType");
|
||||||
|
DataChecksum.Type finalCrcType;
|
||||||
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
|
if (bytesPerCRC == null || crcPerBlock == null || md5 == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return new MD5MD5CRC32FileChecksum(Integer.valueOf(bytesPerCRC),
|
// old versions don't support crcType.
|
||||||
Integer.valueOf(crcPerBlock), new MD5Hash(md5));
|
if (crcType == null || crcType == "") {
|
||||||
} catch(Exception e) {
|
finalCrcType = DataChecksum.Type.CRC32;
|
||||||
|
} else {
|
||||||
|
finalCrcType = DataChecksum.Type.valueOf(crcType);
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (finalCrcType) {
|
||||||
|
case CRC32:
|
||||||
|
return new MD5MD5CRC32GzipFileChecksum(
|
||||||
|
Integer.valueOf(bytesPerCRC),
|
||||||
|
Integer.valueOf(crcPerBlock),
|
||||||
|
new MD5Hash(md5));
|
||||||
|
case CRC32C:
|
||||||
|
return new MD5MD5CRC32CastagnoliFileChecksum(
|
||||||
|
Integer.valueOf(bytesPerCRC),
|
||||||
|
Integer.valueOf(crcPerBlock),
|
||||||
|
new MD5Hash(md5));
|
||||||
|
default:
|
||||||
|
// we should never get here since finalCrcType will
|
||||||
|
// hold a valid type or we should have got an exception.
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
|
throw new SAXException("Invalid attributes: bytesPerCRC=" + bytesPerCRC
|
||||||
+ ", crcPerBlock=" + crcPerBlock + ", md5=" + md5, e);
|
+ ", crcPerBlock=" + crcPerBlock + ", crcType=" + crcType
|
||||||
|
+ ", md5=" + md5, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -114,4 +166,4 @@ public class MD5MD5CRC32FileChecksum extends FileChecksum {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return getAlgorithmName() + ":" + md5;
|
return getAlgorithmName() + ":" + md5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs;
|
||||||
|
|
||||||
|
import org.apache.hadoop.io.MD5Hash;
|
||||||
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
|
|
||||||
|
/** For CRC32 with the Gzip polynomial */
|
||||||
|
public class MD5MD5CRC32GzipFileChecksum extends MD5MD5CRC32FileChecksum {
|
||||||
|
/** Same as this(0, 0, null) */
|
||||||
|
public MD5MD5CRC32GzipFileChecksum() {
|
||||||
|
this(0, 0, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a MD5FileChecksum */
|
||||||
|
public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
|
||||||
|
super(bytesPerCRC, crcPerBlock, md5);
|
||||||
|
}
|
||||||
|
@Override
|
||||||
|
public DataChecksum.Type getCrcType() {
|
||||||
|
// default to the one that is understood by all releases.
|
||||||
|
return DataChecksum.Type.CRC32;
|
||||||
|
}
|
||||||
|
}
|
|
@ -43,14 +43,16 @@ public class DataChecksum implements Checksum {
|
||||||
public static final int CHECKSUM_NULL = 0;
|
public static final int CHECKSUM_NULL = 0;
|
||||||
public static final int CHECKSUM_CRC32 = 1;
|
public static final int CHECKSUM_CRC32 = 1;
|
||||||
public static final int CHECKSUM_CRC32C = 2;
|
public static final int CHECKSUM_CRC32C = 2;
|
||||||
public static final int CHECKSUM_DEFAULT = 3;
|
public static final int CHECKSUM_DEFAULT = 3;
|
||||||
|
public static final int CHECKSUM_MIXED = 4;
|
||||||
|
|
||||||
/** The checksum types */
|
/** The checksum types */
|
||||||
public static enum Type {
|
public static enum Type {
|
||||||
NULL (CHECKSUM_NULL, 0),
|
NULL (CHECKSUM_NULL, 0),
|
||||||
CRC32 (CHECKSUM_CRC32, 4),
|
CRC32 (CHECKSUM_CRC32, 4),
|
||||||
CRC32C(CHECKSUM_CRC32C, 4),
|
CRC32C(CHECKSUM_CRC32C, 4),
|
||||||
DEFAULT(CHECKSUM_DEFAULT, 0); // This cannot be used to create DataChecksum
|
DEFAULT(CHECKSUM_DEFAULT, 0), // This cannot be used to create DataChecksum
|
||||||
|
MIXED (CHECKSUM_MIXED, 0); // This cannot be used to create DataChecksum
|
||||||
|
|
||||||
public final int id;
|
public final int id;
|
||||||
public final int size;
|
public final int size;
|
||||||
|
|
|
@ -29,6 +29,8 @@ import java.util.TreeMap;
|
||||||
import org.apache.hadoop.fs.ContentSummary;
|
import org.apache.hadoop.fs.ContentSummary;
|
||||||
import org.apache.hadoop.fs.FileChecksum;
|
import org.apache.hadoop.fs.FileChecksum;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum;
|
||||||
|
import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum;
|
||||||
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
|
import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.hdfs.DFSUtil;
|
import org.apache.hadoop.hdfs.DFSUtil;
|
||||||
|
@ -43,6 +45,7 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifie
|
||||||
import org.apache.hadoop.ipc.RemoteException;
|
import org.apache.hadoop.ipc.RemoteException;
|
||||||
import org.apache.hadoop.security.token.Token;
|
import org.apache.hadoop.security.token.Token;
|
||||||
import org.apache.hadoop.security.token.TokenIdentifier;
|
import org.apache.hadoop.security.token.TokenIdentifier;
|
||||||
|
import org.apache.hadoop.util.DataChecksum;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.mortbay.util.ajax.JSON;
|
import org.mortbay.util.ajax.JSON;
|
||||||
|
|
||||||
|
@ -512,7 +515,21 @@ public class JsonUtil {
|
||||||
final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes"));
|
final byte[] bytes = StringUtils.hexStringToByte((String)m.get("bytes"));
|
||||||
|
|
||||||
final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
|
final DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes));
|
||||||
final MD5MD5CRC32FileChecksum checksum = new MD5MD5CRC32FileChecksum();
|
final DataChecksum.Type crcType =
|
||||||
|
MD5MD5CRC32FileChecksum.getCrcTypeFromAlgorithmName(algorithm);
|
||||||
|
final MD5MD5CRC32FileChecksum checksum;
|
||||||
|
|
||||||
|
// Recreate what DFSClient would have returned.
|
||||||
|
switch(crcType) {
|
||||||
|
case CRC32:
|
||||||
|
checksum = new MD5MD5CRC32GzipFileChecksum();
|
||||||
|
break;
|
||||||
|
case CRC32C:
|
||||||
|
checksum = new MD5MD5CRC32CastagnoliFileChecksum();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IOException("Unknown algorithm: " + algorithm);
|
||||||
|
}
|
||||||
checksum.readFields(in);
|
checksum.readFields(in);
|
||||||
|
|
||||||
//check algorithm name
|
//check algorithm name
|
||||||
|
|
Loading…
Reference in New Issue