HADOOP-13873. log DNS addresses on s3a initialization.
Contributed by Mukund Thakur. If you set the log org.apache.hadoop.fs.s3a.impl.NetworkBinding to DEBUG, then when the S3A bucket probe is made -the DNS address of the S3 endpoint is calculated and printed. This is useful to see if a large set of processes are all using the same IP address from the pool of load balancers to which AWS directs clients when an AWS S3 endpoint is resolved. This can have implications for performance: if all clients access the same load balancer performance may be suboptimal. Note: if bucket probes are disabled, fs.s3a.bucket.probe = 0, the DNS logging does not take place. Change-Id: I21b3ac429dc0b543f03e357fdeb94c2d2a328dd8
This commit is contained in:
parent
37d6582223
commit
56350664a7
|
@ -163,6 +163,12 @@ public final class Constants {
|
|||
//use a custom endpoint?
|
||||
public static final String ENDPOINT = "fs.s3a.endpoint";
|
||||
|
||||
/**
|
||||
* Default value of s3 endpoint. If not set explicitly using
|
||||
* {@code AmazonS3#setEndpoint()}, this is used.
|
||||
*/
|
||||
public static final String DEFAULT_ENDPOINT = "s3.amazonaws.com";
|
||||
|
||||
//Enable path style access? Overrides default virtual hosting
|
||||
public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access";
|
||||
|
||||
|
|
|
@ -176,6 +176,7 @@ import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIg
|
|||
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
|
||||
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
|
||||
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
|
||||
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup;
|
||||
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
|
||||
|
||||
/**
|
||||
|
@ -469,6 +470,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
|||
* S3AFileSystem initialization. When set to 1 or 2, bucket existence check
|
||||
* will be performed which is potentially slow.
|
||||
* If 3 or higher: warn and use the v2 check.
|
||||
* Also logging DNS address of the s3 endpoint if the bucket probe value is
|
||||
* greater than 0 else skipping it for increased performance.
|
||||
* @throws UnknownStoreException the bucket is absent
|
||||
* @throws IOException any other problem talking to S3
|
||||
*/
|
||||
|
@ -483,9 +486,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
|||
LOG.debug("skipping check for bucket existence");
|
||||
break;
|
||||
case 1:
|
||||
logDnsLookup(getConf());
|
||||
verifyBucketExists();
|
||||
break;
|
||||
case 2:
|
||||
logDnsLookup(getConf());
|
||||
verifyBucketExistsV2();
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -21,6 +21,8 @@ package org.apache.hadoop.fs.s3a.impl;
|
|||
import java.io.IOException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
import javax.net.ssl.HostnameVerifier;
|
||||
import javax.net.ssl.SSLSocketFactory;
|
||||
|
@ -30,9 +32,12 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.net.NetUtils;
|
||||
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
|
||||
|
||||
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SSL_CHANNEL_MODE;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
|
||||
import static org.apache.hadoop.fs.s3a.Constants.SSL_CHANNEL_MODE;
|
||||
|
||||
/**
|
||||
|
@ -121,4 +126,30 @@ public class NetworkBinding {
|
|||
? "us-east-1"
|
||||
: region;
|
||||
}
|
||||
|
||||
/**
|
||||
* Log the dns address associated with s3 endpoint. If endpoint is
|
||||
* not set in the configuration, the {@code Constants#DEFAULT_ENDPOINT}
|
||||
* will be used.
|
||||
* @param conf input configuration.
|
||||
*/
|
||||
public static void logDnsLookup(Configuration conf) {
|
||||
String endPoint = conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT);
|
||||
String hostName = endPoint;
|
||||
if (!endPoint.isEmpty() && LOG.isDebugEnabled()) {
|
||||
// Updating the hostname if there is a scheme present.
|
||||
if (endPoint.contains("://")) {
|
||||
try {
|
||||
URI uri = new URI(endPoint);
|
||||
hostName = uri.getHost();
|
||||
} catch (URISyntaxException e) {
|
||||
LOG.debug("Got URISyntaxException, ignoring");
|
||||
}
|
||||
}
|
||||
LOG.debug("Bucket endpoint : {}, Hostname : {}, DNSAddress : {}",
|
||||
endPoint,
|
||||
hostName,
|
||||
NetUtils.normalizeHostName(hostName));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue