HADOOP-11687. Ignore x-* and response headers when copying an Amazon S3 object. Contributed by Aaron Peterson and harsh.

This commit is contained in:
Harsh J 2016-04-01 14:18:10 +05:30
parent 3488c4f2c9
commit 256c82fe29
2 changed files with 76 additions and 1 deletions

View File

@ -26,6 +26,7 @@ import java.net.URI;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
@ -1128,7 +1129,7 @@ public class S3AFileSystem extends FileSystem {
}
ObjectMetadata srcom = s3.getObjectMetadata(bucket, srcKey);
final ObjectMetadata dstom = srcom.clone();
ObjectMetadata dstom = cloneObjectMetadata(srcom);
if (StringUtils.isNotBlank(serverSideEncryptionAlgorithm)) {
dstom.setSSEAlgorithm(serverSideEncryptionAlgorithm);
}
@ -1234,6 +1235,73 @@ public class S3AFileSystem extends FileSystem {
statistics.incrementWriteOps(1);
}
/**
* Creates a copy of the passed {@link ObjectMetadata}.
* Does so without using the {@link ObjectMetadata#clone()} method,
* to avoid copying unnecessary headers.
* @param source the {@link ObjectMetadata} to copy
* @return a copy of {@link ObjectMetadata} with only relevant attributes
*/
private ObjectMetadata cloneObjectMetadata(ObjectMetadata source) {
// This approach may be too brittle, especially if
// in future there are new attributes added to ObjectMetadata
// that we do not explicitly call to set here
ObjectMetadata ret = new ObjectMetadata();
// Non null attributes
ret.setContentLength(source.getContentLength());
// Possibly null attributes
// Allowing nulls to pass breaks it during later use
if (source.getCacheControl() != null) {
ret.setCacheControl(source.getCacheControl());
}
if (source.getContentDisposition() != null) {
ret.setContentDisposition(source.getContentDisposition());
}
if (source.getContentEncoding() != null) {
ret.setContentEncoding(source.getContentEncoding());
}
if (source.getContentMD5() != null) {
ret.setContentMD5(source.getContentMD5());
}
if (source.getContentType() != null) {
ret.setContentType(source.getContentType());
}
if (source.getExpirationTime() != null) {
ret.setExpirationTime(source.getExpirationTime());
}
if (source.getExpirationTimeRuleId() != null) {
ret.setExpirationTimeRuleId(source.getExpirationTimeRuleId());
}
if (source.getHttpExpiresDate() != null) {
ret.setHttpExpiresDate(source.getHttpExpiresDate());
}
if (source.getLastModified() != null) {
ret.setLastModified(source.getLastModified());
}
if (source.getOngoingRestore() != null) {
ret.setOngoingRestore(source.getOngoingRestore());
}
if (source.getRestoreExpirationTime() != null) {
ret.setRestoreExpirationTime(source.getRestoreExpirationTime());
}
if (source.getSSEAlgorithm() != null) {
ret.setSSEAlgorithm(source.getSSEAlgorithm());
}
if (source.getSSECustomerAlgorithm() != null) {
ret.setSSECustomerAlgorithm(source.getSSECustomerAlgorithm());
}
if (source.getSSECustomerKeyMd5() != null) {
ret.setSSECustomerKeyMd5(source.getSSECustomerKeyMd5());
}
for (Map.Entry<String, String> e : source.getUserMetadata().entrySet()) {
ret.addUserMetadata(e.getKey(), e.getValue());
}
return ret;
}
/**
* Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time.

View File

@ -417,6 +417,13 @@ which pass in authentication details to the test runner
These are both Hadoop XML configuration files, which must be placed into
`hadoop-tools/hadoop-aws/src/test/resources`.
### `core-site.xml`
This file pre-exists and sources the configurations created
under `auth-keys.xml`.
For most purposes you will not need to edit this file unless you
need to apply a specific, non-default property change during the tests.
### `auth-keys.xml`