HADOOP-11687. Ignore x-* and response headers when copying an Amazon S3 object. Contributed by Aaron Peterson and harsh.

(cherry picked from commit 256c82fe29)
This commit is contained in:
Harsh J 2016-04-01 14:18:10 +05:30
parent 10d8f8a39c
commit 7c5b55d4e5
2 changed files with 76 additions and 1 deletions

View File

@ -26,6 +26,7 @@ import java.net.URI;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Date; import java.util.Date;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.ThreadPoolExecutor;
@ -1186,7 +1187,7 @@ public class S3AFileSystem extends FileSystem {
} }
ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey); ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey);
final ObjectMetadata dstom = srcom.clone(); ObjectMetadata dstom = cloneObjectMetadata(srcom);
if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) { if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) {
dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm); dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm);
} }
@ -1292,6 +1293,73 @@ public class S3AFileSystem extends FileSystem {
statistics.incrementWriteOps(1); statistics.incrementWriteOps(1);
} }
/**
* Creates a copy of the passed {@link ObjectMetadata}.
* Does so without using the {@link ObjectMetadata#clone()} method,
* to avoid copying unnecessary headers.
* @param source the {@link ObjectMetadata} to copy
* @return a copy of {@link ObjectMetadata} with only relevant attributes
*/
private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) {
// This approach may be too brittle, especially if
// in future there are new attributes added to ObjectMetadata
// that we do not explicitly call to set here
ObjectMetadata ret = new ObjectMetadata();
// Non null attributes
ret.setContentLength(source.getContentLength());
// Possibly null attributes
// Allowing nulls to pass breaks it during later use
if (source.getCacheControl() != null) {
ret.setCacheControl(source.getCacheControl());
}
if (source.getContentDisposition() != null) {
ret.setContentDisposition(source.getContentDisposition());
}
if (source.getContentEncoding() != null) {
ret.setContentEncoding(source.getContentEncoding());
}
if (source.getContentMD5() != null) {
ret.setContentMD5(source.getContentMD5());
}
if (source.getContentType() != null) {
ret.setContentType(source.getContentType());
}
if (source.getExpirationTime() != null) {
ret.setExpirationTime(source.getExpirationTime());
}
if (source.getExpirationTimeRuleId() != null) {
ret.setExpirationTimeRuleId(source.getExpirationTimeRuleId());
}
if (source.getHttpExpiresDate() != null) {
ret.setHttpExpiresDate(source.getHttpExpiresDate());
}
if (source.getLastModified() != null) {
ret.setLastModified(source.getLastModified());
}
if (source.getOngoingRestore() != null) {
ret.setOngoingRestore(source.getOngoingRestore());
}
if (source.getRestoreExpirationTime() != null) {
ret.setRestoreExpirationTime(source.getRestoreExpirationTime());
}
if (source.getSSEAlgorithm() != null) {
ret.setSSEAlgorithm(source.getSSEAlgorithm());
}
if (source.getSSECustomerAlgorithm() != null) {
ret.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm());
}
if (source.getSSECustomerKeyMd5() != null) {
ret.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5());
}
for (Map.Entry<String, String> e : source.getUserMetadata().entrySet()) {
ret.addUserMetadata(e.getKey(), e.getValue());
}
return ret;
}
/** /**
* Return the number of bytes that large input files should be optimally * Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time. * be split into to minimize i/o time.

View File

@ -420,6 +420,13 @@ which pass in authentication details to the test runner
These are both Hadoop XML configuration files, which must be placed into These are both Hadoop XML configuration files, which must be placed into
`hadoop-tools/hadoop-aws/src/test/resources`. `hadoop-tools/hadoop-aws/src/test/resources`.
### `core-site.xml`
This file pre-exists and sources the configurations created
under `auth-keys.xml`.
For most purposes you will not need to edit this file unless you
need to apply a specific, non-default property change during the tests.
### `auth-keys.xml` ### `auth-keys.xml`