HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)

This commit is contained in:
Allen Wittenauer 2014-09-24 15:38:18 -07:00
parent cbf0ae742a
commit 3cde37c991
8 changed files with 990 additions and 110 deletions

View File

@ -543,6 +543,8 @@ Release 2.6.0 - UNRELEASED
HADOOP-11017. KMS delegation token secret manager should be able to use HADOOP-11017. KMS delegation token secret manager should be able to use
zookeeper as store. (asuresh via tucu) zookeeper as store. (asuresh via tucu)
HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-10838. Byte array native checksumming. (James Thomas via todd) HADOOP-10838. Byte array native checksumming. (James Thomas via todd)

View File

@ -101,7 +101,7 @@ public final class CopyListingFileStatus extends FileStatus {
* @return Map<String, byte[]> containing all xAttrs * @return Map<String, byte[]> containing all xAttrs
*/ */
public Map<String, byte[]> getXAttrs() { public Map<String, byte[]> getXAttrs() {
return xAttrs; return xAttrs != null ? xAttrs : Collections.<String, byte[]>emptyMap();
} }
/** /**

View File

@ -37,18 +37,21 @@ public enum DistCpOptionSwitch {
/** /**
* Preserves status of file/path in the target. * Preserves status of file/path in the target.
* Default behavior with -p, is to preserve replication, * Default behavior with -p, is to preserve replication,
* block size, user, group, permission and checksum type on the target file. * block size, user, group, permission, checksum type and timestamps on the
* Note that when preserving checksum type, block size is also preserved. * target file. Note that when preserving checksum type, block size is also
* preserved.
* *
* If any of the optional switches are present among rbugpc, then * @see PRESERVE_STATUS_DEFAULT
*
* If any of the optional switches are present among rbugpcaxt, then
* only the corresponding file attribute is preserved. * only the corresponding file attribute is preserved.
*
*/ */
PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS, PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
new Option("p", true, "preserve status (rbugpcax)(replication, " + new Option("p", true, "preserve status (rbugpcaxt)(replication, " +
"block-size, user, group, permission, checksum-type, ACL, XATTR). " + "block-size, user, group, permission, checksum-type, ACL, XATTR, " +
"If -p is specified with no <arg>, then preserves replication, " + "timestamps). If -p is specified with no <arg>, then preserves " +
"block size, user, group, permission and checksum type." + "replication, block size, user, group, permission, checksum type " +
"and timestamps. " +
"raw.* xattrs are preserved when both the source and destination " + "raw.* xattrs are preserved when both the source and destination " +
"paths are in the /.reserved/raw hierarchy (HDFS only). raw.* xattr" + "paths are in the /.reserved/raw hierarchy (HDFS only). raw.* xattr" +
"preservation is independent of the -p flag." + "preservation is independent of the -p flag." +
@ -166,7 +169,7 @@ public enum DistCpOptionSwitch {
BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB, BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
new Option("bandwidth", true, "Specify bandwidth per map in MB")); new Option("bandwidth", true, "Specify bandwidth per map in MB"));
static final String PRESERVE_STATUS_DEFAULT = "-prbugpc"; public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct";
private final String confLabel; private final String confLabel;
private final Option option; private final Option option;

View File

@ -68,7 +68,7 @@ public class DistCpOptions {
private boolean targetPathExists = true; private boolean targetPathExists = true;
public static enum FileAttribute{ public static enum FileAttribute{
REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR; REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR, TIMES;
public static FileAttribute getAttribute(char symbol) { public static FileAttribute getAttribute(char symbol) {
for (FileAttribute attribute : values()) { for (FileAttribute attribute : values()) {

View File

@ -18,39 +18,39 @@
package org.apache.hadoop.tools.util; package org.apache.hadoop.tools.util;
import com.google.common.collect.Maps; import java.io.IOException;
import java.net.InetAddress;
import java.net.URI;
import java.net.UnknownHostException;
import java.text.DecimalFormat;
import java.util.EnumSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileChecksum;
import org.apache.hadoop.fs.XAttr; import org.apache.hadoop.fs.XAttr;
import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclUtil; import org.apache.hadoop.fs.permission.AclUtil;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
import org.apache.hadoop.tools.CopyListing.XAttrsNotSupportedException; import org.apache.hadoop.tools.CopyListing.XAttrsNotSupportedException;
import org.apache.hadoop.tools.CopyListingFileStatus; import org.apache.hadoop.tools.CopyListingFileStatus;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.tools.DistCpOptions.FileAttribute; import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
import org.apache.hadoop.tools.mapred.UniformSizeInputFormat; import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
import org.apache.hadoop.tools.DistCpOptions;
import org.apache.hadoop.mapreduce.InputFormat;
import java.io.IOException; import com.google.common.collect.Maps;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.text.DecimalFormat;
import java.net.URI;
import java.net.InetAddress;
import java.net.UnknownHostException;
/** /**
* Utility functions used in DistCp. * Utility functions used in DistCp.
@ -163,7 +163,7 @@ public class DistCpUtils {
} }
/** /**
* Un packs preservation attribute string containing the first character of * Unpacks preservation attribute string containing the first character of
* each preservation attribute back to a set of attributes to preserve * each preservation attribute back to a set of attributes to preserve
* @param attributes - Attribute string * @param attributes - Attribute string
* @return - Attribute set * @return - Attribute set
@ -209,7 +209,7 @@ public class DistCpUtils {
if (!srcAcl.equals(targetAcl)) { if (!srcAcl.equals(targetAcl)) {
targetFS.setAcl(path, srcAcl); targetFS.setAcl(path, srcAcl);
} }
// setAcl can't preserve sticky bit, so also call setPermission if needed. // setAcl doesn't preserve sticky bit, so also call setPermission if needed.
if (srcFileStatus.getPermission().getStickyBit() != if (srcFileStatus.getPermission().getStickyBit() !=
targetFileStatus.getPermission().getStickyBit()) { targetFileStatus.getPermission().getStickyBit()) {
targetFS.setPermission(path, srcFileStatus.getPermission()); targetFS.setPermission(path, srcFileStatus.getPermission());
@ -225,19 +225,17 @@ public class DistCpUtils {
Map<String, byte[]> srcXAttrs = srcFileStatus.getXAttrs(); Map<String, byte[]> srcXAttrs = srcFileStatus.getXAttrs();
Map<String, byte[]> targetXAttrs = getXAttrs(targetFS, path); Map<String, byte[]> targetXAttrs = getXAttrs(targetFS, path);
if (srcXAttrs != null && !srcXAttrs.equals(targetXAttrs)) { if (srcXAttrs != null && !srcXAttrs.equals(targetXAttrs)) {
Iterator<Entry<String, byte[]>> iter = srcXAttrs.entrySet().iterator(); for (Entry<String, byte[]> entry : srcXAttrs.entrySet()) {
while (iter.hasNext()) { String xattrName = entry.getKey();
Entry<String, byte[]> entry = iter.next();
final String xattrName = entry.getKey();
if (xattrName.startsWith(rawNS) || preserveXAttrs) { if (xattrName.startsWith(rawNS) || preserveXAttrs) {
targetFS.setXAttr(path, entry.getKey(), entry.getValue()); targetFS.setXAttr(path, xattrName, entry.getValue());
} }
} }
} }
} }
if (attributes.contains(FileAttribute.REPLICATION) && ! targetFileStatus.isDirectory() && if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() &&
srcFileStatus.getReplication() != targetFileStatus.getReplication()) { (srcFileStatus.getReplication() != targetFileStatus.getReplication())) {
targetFS.setReplication(path, srcFileStatus.getReplication()); targetFS.setReplication(path, srcFileStatus.getReplication());
} }
@ -256,6 +254,12 @@ public class DistCpUtils {
if (chown) { if (chown) {
targetFS.setOwner(path, user, group); targetFS.setOwner(path, user, group);
} }
if (attributes.contains(FileAttribute.TIMES)) {
targetFS.setTimes(path,
srcFileStatus.getModificationTime(),
srcFileStatus.getAccessTime());
}
} }
/** /**

View File

@ -497,7 +497,7 @@ public class TestOptionsParser {
attribIterator.next(); attribIterator.next();
i++; i++;
} }
Assert.assertEquals(i, 6); Assert.assertEquals(i, DistCpOptionSwitch.PRESERVE_STATUS_DEFAULT.length() - 2);
try { try {
OptionsParser.parse(new String[] { OptionsParser.parse(new String[] {

View File

@ -590,6 +590,7 @@ public class TestCopyMapper {
EnumSet.allOf(DistCpOptions.FileAttribute.class); EnumSet.allOf(DistCpOptions.FileAttribute.class);
preserveStatus.remove(DistCpOptions.FileAttribute.ACL); preserveStatus.remove(DistCpOptions.FileAttribute.ACL);
preserveStatus.remove(DistCpOptions.FileAttribute.XATTR); preserveStatus.remove(DistCpOptions.FileAttribute.XATTR);
preserveStatus.remove(DistCpOptions.FileAttribute.TIMES);
context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS, context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
DistCpUtils.packAttributes(preserveStatus)); DistCpUtils.packAttributes(preserveStatus));