diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 4cda7cdcb5a..33db86ed81e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -26,6 +26,7 @@ import java.net.URI; import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.Map; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadPoolExecutor; @@ -1186,7 +1187,7 @@ public class S3AFileSystem extends FileSystem { } ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey); - final ObjectMetadata dstom = srcom.clone(); + ObjectMetadata dstom = cloneObjectMetadata(srcom); if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) { dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm); } @@ -1292,6 +1293,73 @@ public class S3AFileSystem extends FileSystem { statistics.incrementWriteOps(1); } + /** + * Creates a copy of the passed {@link ObjectMetadata}. + * Does so without using the {@link ObjectMetadata#clone()} method, + * to avoid copying unnecessary headers. + * @param source the {@link ObjectMetadata} to copy + * @return a copy of {@link ObjectMetadata} with only relevant attributes + */ + private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) { + // This approach may be too brittle, especially if + // in future there are new attributes added to ObjectMetadata + // that we do not explicitly call to set here + ObjectMetadata ret = new ObjectMetadata(); + + // Non null attributes + ret.setContentLength(source.getContentLength()); + + // Possibly null attributes + // Allowing nulls to pass breaks it during later use + if (source.getCacheControl() != null) { + ret.setCacheControl(source.getCacheControl()); + } + if (source.getContentDisposition() != null) { + ret.setContentDisposition(source.getContentDisposition()); + } + if (source.getContentEncoding() != null) { + ret.setContentEncoding(source.getContentEncoding()); + } + if (source.getContentMD5() != null) { + ret.setContentMD5(source.getContentMD5()); + } + if (source.getContentType() != null) { + ret.setContentType(source.getContentType()); + } + if (source.getExpirationTime() != null) { + ret.setExpirationTime(source.getExpirationTime()); + } + if (source.getExpirationTimeRuleId() != null) { + ret.setExpirationTimeRuleId(source.getExpirationTimeRuleId()); + } + if (source.getHttpExpiresDate() != null) { + ret.setHttpExpiresDate(source.getHttpExpiresDate()); + } + if (source.getLastModified() != null) { + ret.setLastModified(source.getLastModified()); + } + if (source.getOngoingRestore() != null) { + ret.setOngoingRestore(source.getOngoingRestore()); + } + if (source.getRestoreExpirationTime() != null) { + ret.setRestoreExpirationTime(source.getRestoreExpirationTime()); + } + if (source.getSSEAlgorithm() != null) { + ret.setSSEAlgorithm(source.getSSEAlgorithm()); + } + if (source.getSSECustomerAlgorithm() != null) { + ret.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm()); + } + if (source.getSSECustomerKeyMd5() != null) { + ret.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5()); + } + + for (Map.Entry e : source.getUserMetadata().entrySet()) { + ret.addUserMetadata(e.getKey(), e.getValue()); + } + return ret; + } + /** * Return the number of bytes that large input files should be optimally * be split into to minimize i/o time. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 2d1c02958b1..6c3f31e84c0 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -420,6 +420,13 @@ which pass in authentication details to the test runner These are both Hadoop XML configuration files, which must be placed into `hadoop-tools/hadoop-aws/src/test/resources`. +### `core-site.xml` + +This file pre-exists and sources the configurations created +under `auth-keys.xml`. + +For most purposes you will not need to edit this file unless you +need to apply a specific, non-default property change during the tests. ### `auth-keys.xml`