diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 0ca4aa01a7e..430a6bc325a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -163,6 +163,12 @@ public final class Constants { //use a custom endpoint? public static final String ENDPOINT = "fs.s3a.endpoint"; + /** + * Default value of s3 endpoint. If not set explicitly using + * {@code AmazonS3#setEndpoint()}, this is used. + */ + public static final String DEFAULT_ENDPOINT = "s3.amazonaws.com"; + //Enable path style access? Overrides default virtual hosting public static final String PATH_STYLE_ACCESS = "fs.s3a.path.style.access"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 9630a9eff74..6d2b3a84ca7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -176,6 +176,7 @@ import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletionIg import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404; import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.fixBucketRegion; +import static org.apache.hadoop.fs.s3a.impl.NetworkBinding.logDnsLookup; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; /** @@ -469,6 +470,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, * S3AFileSystem initialization. When set to 1 or 2, bucket existence check * will be performed which is potentially slow. * If 3 or higher: warn and use the v2 check. + * Also logging DNS address of the s3 endpoint if the bucket probe value is + * greater than 0 else skipping it for increased performance. * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ @@ -483,9 +486,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, LOG.debug("skipping check for bucket existence"); break; case 1: + logDnsLookup(getConf()); verifyBucketExists(); break; case 2: + logDnsLookup(getConf()); verifyBucketExistsV2(); break; default: diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java index 7ff44510011..8b34376a255 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/NetworkBinding.java @@ -21,6 +21,8 @@ package org.apache.hadoop.fs.s3a.impl; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; +import java.net.URI; +import java.net.URISyntaxException; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.SSLSocketFactory; @@ -30,9 +32,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SSL_CHANNEL_MODE; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.SSL_CHANNEL_MODE; /** @@ -121,4 +126,30 @@ public class NetworkBinding { ? "us-east-1" : region; } + + /** + * Log the dns address associated with s3 endpoint. If endpoint is + * not set in the configuration, the {@code Constants#DEFAULT_ENDPOINT} + * will be used. + * @param conf input configuration. + */ + public static void logDnsLookup(Configuration conf) { + String endPoint = conf.getTrimmed(ENDPOINT, DEFAULT_ENDPOINT); + String hostName = endPoint; + if (!endPoint.isEmpty() && LOG.isDebugEnabled()) { + // Updating the hostname if there is a scheme present. + if (endPoint.contains("://")) { + try { + URI uri = new URI(endPoint); + hostName = uri.getHost(); + } catch (URISyntaxException e) { + LOG.debug("Got URISyntaxException, ignoring"); + } + } + LOG.debug("Bucket endpoint : {}, Hostname : {}, DNSAddress : {}", + endPoint, + hostName, + NetUtils.normalizeHostName(hostName)); + } + } }