HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)
This commit is contained in:
parent
cbf0ae742a
commit
3cde37c991
|
@ -543,6 +543,8 @@ Release 2.6.0 - UNRELEASED
|
|||
HADOOP-11017. KMS delegation token secret manager should be able to use
|
||||
zookeeper as store. (asuresh via tucu)
|
||||
|
||||
HADOOP-11009. Add Timestamp Preservation to DistCp (Gary Steelman via aw)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
HADOOP-10838. Byte array native checksumming. (James Thomas via todd)
|
||||
|
|
|
@ -101,7 +101,7 @@ public final class CopyListingFileStatus extends FileStatus {
|
|||
* @return Map<String, byte[]> containing all xAttrs
|
||||
*/
|
||||
public Map<String, byte[]> getXAttrs() {
|
||||
return xAttrs;
|
||||
return xAttrs != null ? xAttrs : Collections.<String, byte[]>emptyMap();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -37,18 +37,21 @@ public enum DistCpOptionSwitch {
|
|||
/**
|
||||
* Preserves status of file/path in the target.
|
||||
* Default behavior with -p, is to preserve replication,
|
||||
* block size, user, group, permission and checksum type on the target file.
|
||||
* Note that when preserving checksum type, block size is also preserved.
|
||||
* block size, user, group, permission, checksum type and timestamps on the
|
||||
* target file. Note that when preserving checksum type, block size is also
|
||||
* preserved.
|
||||
*
|
||||
* If any of the optional switches are present among rbugpc, then
|
||||
* @see PRESERVE_STATUS_DEFAULT
|
||||
*
|
||||
* If any of the optional switches are present among rbugpcaxt, then
|
||||
* only the corresponding file attribute is preserved.
|
||||
*
|
||||
*/
|
||||
PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
|
||||
new Option("p", true, "preserve status (rbugpcax)(replication, " +
|
||||
"block-size, user, group, permission, checksum-type, ACL, XATTR). " +
|
||||
"If -p is specified with no <arg>, then preserves replication, " +
|
||||
"block size, user, group, permission and checksum type." +
|
||||
new Option("p", true, "preserve status (rbugpcaxt)(replication, " +
|
||||
"block-size, user, group, permission, checksum-type, ACL, XATTR, " +
|
||||
"timestamps). If -p is specified with no <arg>, then preserves " +
|
||||
"replication, block size, user, group, permission, checksum type " +
|
||||
"and timestamps. " +
|
||||
"raw.* xattrs are preserved when both the source and destination " +
|
||||
"paths are in the /.reserved/raw hierarchy (HDFS only). raw.* xattr" +
|
||||
"preservation is independent of the -p flag." +
|
||||
|
@ -166,7 +169,7 @@ public enum DistCpOptionSwitch {
|
|||
BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB,
|
||||
new Option("bandwidth", true, "Specify bandwidth per map in MB"));
|
||||
|
||||
static final String PRESERVE_STATUS_DEFAULT = "-prbugpc";
|
||||
public static final String PRESERVE_STATUS_DEFAULT = "-prbugpct";
|
||||
private final String confLabel;
|
||||
private final Option option;
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ public class DistCpOptions {
|
|||
private boolean targetPathExists = true;
|
||||
|
||||
public static enum FileAttribute{
|
||||
REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR;
|
||||
REPLICATION, BLOCKSIZE, USER, GROUP, PERMISSION, CHECKSUMTYPE, ACL, XATTR, TIMES;
|
||||
|
||||
public static FileAttribute getAttribute(char symbol) {
|
||||
for (FileAttribute attribute : values()) {
|
||||
|
|
|
@ -18,39 +18,39 @@
|
|||
|
||||
package org.apache.hadoop.tools.util;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import java.io.IOException;
|
||||
import java.net.InetAddress;
|
||||
import java.net.URI;
|
||||
import java.net.UnknownHostException;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.FileChecksum;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.FileChecksum;
|
||||
import org.apache.hadoop.fs.XAttr;
|
||||
import org.apache.hadoop.fs.permission.AclEntry;
|
||||
import org.apache.hadoop.fs.permission.AclUtil;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.apache.hadoop.io.SequenceFile;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.InputFormat;
|
||||
import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
|
||||
import org.apache.hadoop.tools.CopyListing.XAttrsNotSupportedException;
|
||||
import org.apache.hadoop.tools.CopyListingFileStatus;
|
||||
import org.apache.hadoop.tools.DistCpOptions;
|
||||
import org.apache.hadoop.tools.DistCpOptions.FileAttribute;
|
||||
import org.apache.hadoop.tools.mapred.UniformSizeInputFormat;
|
||||
import org.apache.hadoop.tools.CopyListing.AclsNotSupportedException;
|
||||
import org.apache.hadoop.tools.DistCpOptions;
|
||||
import org.apache.hadoop.mapreduce.InputFormat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.text.DecimalFormat;
|
||||
import java.net.URI;
|
||||
import java.net.InetAddress;
|
||||
import java.net.UnknownHostException;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
/**
|
||||
* Utility functions used in DistCp.
|
||||
|
@ -163,7 +163,7 @@ public class DistCpUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Un packs preservation attribute string containing the first character of
|
||||
* Unpacks preservation attribute string containing the first character of
|
||||
* each preservation attribute back to a set of attributes to preserve
|
||||
* @param attributes - Attribute string
|
||||
* @return - Attribute set
|
||||
|
@ -209,7 +209,7 @@ public class DistCpUtils {
|
|||
if (!srcAcl.equals(targetAcl)) {
|
||||
targetFS.setAcl(path, srcAcl);
|
||||
}
|
||||
// setAcl can't preserve sticky bit, so also call setPermission if needed.
|
||||
// setAcl doesn't preserve sticky bit, so also call setPermission if needed.
|
||||
if (srcFileStatus.getPermission().getStickyBit() !=
|
||||
targetFileStatus.getPermission().getStickyBit()) {
|
||||
targetFS.setPermission(path, srcFileStatus.getPermission());
|
||||
|
@ -225,30 +225,28 @@ public class DistCpUtils {
|
|||
Map<String, byte[]> srcXAttrs = srcFileStatus.getXAttrs();
|
||||
Map<String, byte[]> targetXAttrs = getXAttrs(targetFS, path);
|
||||
if (srcXAttrs != null && !srcXAttrs.equals(targetXAttrs)) {
|
||||
Iterator<Entry<String, byte[]>> iter = srcXAttrs.entrySet().iterator();
|
||||
while (iter.hasNext()) {
|
||||
Entry<String, byte[]> entry = iter.next();
|
||||
final String xattrName = entry.getKey();
|
||||
for (Entry<String, byte[]> entry : srcXAttrs.entrySet()) {
|
||||
String xattrName = entry.getKey();
|
||||
if (xattrName.startsWith(rawNS) || preserveXAttrs) {
|
||||
targetFS.setXAttr(path, entry.getKey(), entry.getValue());
|
||||
targetFS.setXAttr(path, xattrName, entry.getValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (attributes.contains(FileAttribute.REPLICATION) && ! targetFileStatus.isDirectory() &&
|
||||
srcFileStatus.getReplication() != targetFileStatus.getReplication()) {
|
||||
if (attributes.contains(FileAttribute.REPLICATION) && !targetFileStatus.isDirectory() &&
|
||||
(srcFileStatus.getReplication() != targetFileStatus.getReplication())) {
|
||||
targetFS.setReplication(path, srcFileStatus.getReplication());
|
||||
}
|
||||
|
||||
if (attributes.contains(FileAttribute.GROUP) &&
|
||||
!group.equals(srcFileStatus.getGroup())) {
|
||||
!group.equals(srcFileStatus.getGroup())) {
|
||||
group = srcFileStatus.getGroup();
|
||||
chown = true;
|
||||
}
|
||||
|
||||
if (attributes.contains(FileAttribute.USER) &&
|
||||
!user.equals(srcFileStatus.getOwner())) {
|
||||
!user.equals(srcFileStatus.getOwner())) {
|
||||
user = srcFileStatus.getOwner();
|
||||
chown = true;
|
||||
}
|
||||
|
@ -256,6 +254,12 @@ public class DistCpUtils {
|
|||
if (chown) {
|
||||
targetFS.setOwner(path, user, group);
|
||||
}
|
||||
|
||||
if (attributes.contains(FileAttribute.TIMES)) {
|
||||
targetFS.setTimes(path,
|
||||
srcFileStatus.getModificationTime(),
|
||||
srcFileStatus.getAccessTime());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -497,7 +497,7 @@ public class TestOptionsParser {
|
|||
attribIterator.next();
|
||||
i++;
|
||||
}
|
||||
Assert.assertEquals(i, 6);
|
||||
Assert.assertEquals(i, DistCpOptionSwitch.PRESERVE_STATUS_DEFAULT.length() - 2);
|
||||
|
||||
try {
|
||||
OptionsParser.parse(new String[] {
|
||||
|
|
|
@ -590,6 +590,7 @@ public class TestCopyMapper {
|
|||
EnumSet.allOf(DistCpOptions.FileAttribute.class);
|
||||
preserveStatus.remove(DistCpOptions.FileAttribute.ACL);
|
||||
preserveStatus.remove(DistCpOptions.FileAttribute.XATTR);
|
||||
preserveStatus.remove(DistCpOptions.FileAttribute.TIMES);
|
||||
|
||||
context.getConfiguration().set(DistCpConstants.CONF_LABEL_PRESERVE_STATUS,
|
||||
DistCpUtils.packAttributes(preserveStatus));
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue