HADOOP-13873. log DNS addresses on s3a initialization.

Contributed by Mukund Thakur.

If you set the log org.apache.hadoop.fs.s3a.impl.NetworkBinding
to DEBUG, then when the S3A bucket probe is made -the DNS address
of the S3 endpoint is calculated and printed.

This is useful to see if a large set of processes are all using
the same IP address from the pool of load balancers to which AWS
directs clients when an AWS S3 endpoint is resolved.

This can have implications for performance: if all clients
access the same load balancer performance may be suboptimal.

Note: if bucket probes are disabled, fs.s3a.bucket.probe = 0,
the DNS logging does not take place.

Change-Id: I21b3ac429dc0b543f03e357fdeb94c2d2a328dd8
This commit is contained in:
Mukund Thakur 2020-04-17 14:15:38 +01:00 committed by Steve Loughran
parent 37d6582223
commit 56350664a7
No known key found for this signature in database
GPG Key ID: D22CF846DBB162A0
3 changed files with 42 additions and 0 deletions

View File

@ -163,6 +163,12 @@ public final class Constants {
//use a custom endpoint?
public static final String ENDPOINT = "fs.s3a.endpoint";
/**
* Default value of s3 endpoint. If not set explicitly using
* {@code AmazonS3#setEndpoint()}, this is used.
*/
public static final String DEFAULT_ENDPOINT = "s3.amazonaws.com";
//Enable path style access? Overrides default virtual hosting
public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access";

View File

@ -176,6 +176,7 @@ import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIg
import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion;
import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
/**
@ -469,6 +470,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
* S3AFileSystem initialization. When set to 1 or 2, bucket existence check
* will be performed which is potentially slow.
* If 3 or higher: warn and use the v2 check.
* Also logging DNS address of the s3 endpoint if the bucket probe value is
* greater than 0 else skipping it for increased performance.
* @throws UnknownStoreException the bucket is absent
* @throws IOException any other problem talking to S3
*/
@ -483,9 +486,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
LOG.debug("skipping check for bucket existence");
break;
case 1:
logDnsLookup(getConf());
verifyBucketExists();
break;
case 2:
logDnsLookup(getConf());
verifyBucketExistsV2();
break;
default:

View File

@ -21,6 +21,8 @@ package org.apache.hadoop.fs.s3a.impl;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.SSLSocketFactory;
@ -30,9 +32,12 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SSL_CHANNEL_MODE;
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
import static org.apache.hadoop.fs.s3a.Constants.SSL_CHANNEL_MODE;
/**
@ -121,4 +126,30 @@ public class NetworkBinding {
? "us-east-1"
: region;
}
/**
* Log the dns address associated with s3 endpoint. If endpoint is
* not set in the configuration, the {@code Constants#DEFAULT_ENDPOINT}
* will be used.
* @param conf input configuration.
*/
public static void logDnsLookup(Configuration conf) {
String endPoint = conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT);
String hostName = endPoint;
if (!endPoint.isEmpty() && LOG.isDebugEnabled()) {
// Updating the hostname if there is a scheme present.
if (endPoint.contains("://")) {
try {
URI uri = new URI(endPoint);
hostName = uri.getHost();
} catch (URISyntaxException e) {
LOG.debug("Got URISyntaxException, ignoring");
}
}
LOG.debug("Bucket endpoint : {}, Hostname : {}, DNSAddress : {}",
endPoint,
hostName,
NetUtils.normalizeHostName(hostName));
}
}
}