diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 52485c8fe8a..b32d58baaa4 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -373,6 +373,9 @@ Release 2.6.0 - UNRELEASED
YARN-2676. Enhanced Timeline auth-filter to support proxy users. (Zhijie Shen
via vinodkv)
+ YARN-2673. Made timeline client put APIs retry if ConnectException happens.
+ (Li Lu via zjshen)
+
OPTIMIZATIONS
BUG FIXES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
index a81c1a62a91..992d3ea7fc5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java
@@ -1317,6 +1317,23 @@ public class YarnConfiguration extends Configuration {
public static final boolean
TIMELINE_SERVICE_HTTP_CROSS_ORIGIN_ENABLED_DEFAULT = false;
+ /** Timeline client settings */
+ public static final String TIMELINE_SERVICE_CLIENT_PREFIX =
+ TIMELINE_SERVICE_PREFIX + "client.";
+
+ /** Timeline client call, max retries (-1 means no limit) */
+ public static final String TIMELINE_SERVICE_CLIENT_MAX_RETRIES =
+ TIMELINE_SERVICE_CLIENT_PREFIX + "max-retries";
+
+ public static final int DEFAULT_TIMELINE_SERVICE_CLIENT_MAX_RETRIES = 30;
+
+ /** Timeline client call, retry interval */
+ public static final String TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS =
+ TIMELINE_SERVICE_CLIENT_PREFIX + "retry-interval-ms";
+
+ public static final long
+ DEFAULT_TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS = 1000;
+
// ///////////////////////////////
// Shared Cache Configs
// ///////////////////////////////
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
index 1b863d54379..a2efbc609ea 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.client.api.impl;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.UndeclaredThrowableException;
+import java.net.ConnectException;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
@@ -67,7 +68,10 @@ import org.codehaus.jackson.map.ObjectMapper;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.sun.jersey.api.client.Client;
+import com.sun.jersey.api.client.filter.ClientFilter;
import com.sun.jersey.api.client.ClientResponse;
+import com.sun.jersey.api.client.ClientRequest;
+import com.sun.jersey.api.client.ClientHandlerException;
import com.sun.jersey.api.client.WebResource;
import com.sun.jersey.api.client.config.ClientConfig;
import com.sun.jersey.api.client.config.DefaultClientConfig;
@@ -103,6 +107,80 @@ public class TimelineClientImpl extends TimelineClient {
private URI resURI;
private boolean isEnabled;
+ private TimelineJerseyRetryFilter retryFilter;
+
+ static class TimelineJerseyRetryFilter extends ClientFilter {
+ // maxRetries < 0 means keep trying
+ @Private
+ @VisibleForTesting
+ public int maxRetries;
+
+ @Private
+ @VisibleForTesting
+ public long retryInterval;
+
+ // Indicates if retries happened last time
+ @Private
+ @VisibleForTesting
+ public boolean retried = false;
+
+ // Constructor with default retry settings
+ public TimelineJerseyRetryFilter(Configuration conf) {
+ super();
+ maxRetries = conf.getInt(
+ YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES,
+ YarnConfiguration.DEFAULT_TIMELINE_SERVICE_CLIENT_MAX_RETRIES);
+ retryInterval = conf.getLong(
+ YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS,
+ YarnConfiguration.DEFAULT_TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS);
+ }
+
+ @Override
+ public ClientResponse handle(ClientRequest cr)
+ throws ClientHandlerException {
+ int leftRetries = maxRetries;
+ retried = false;
+ // keep trying
+ while (true) {
+ try {
+ // try pass the request on, if fail, keep retrying
+ return getNext().handle(cr);
+ } catch (ClientHandlerException e) {
+ // break if there's no retries left
+ if (leftRetries == 0) {
+ break;
+ }
+ if(e.getCause() instanceof ConnectException) {
+ if (leftRetries > 0) {
+ LOG.info("Connection Timeout (" + cr.getURI() + "), will try "
+ + leftRetries + " more time(s).");
+ } else {
+ // note that maxRetries may be -1 at the very beginning
+ // maxRetries = -1 means keep trying
+ LOG.info("Connection Timeout (" + cr.getURI()
+ + "), will keep retrying.");
+ }
+ retried = true;
+ } else {
+ throw e;
+ }
+ }
+ if (leftRetries > 0) {
+ leftRetries--;
+ }
+ try {
+ // sleep for the given time interval
+ Thread.sleep(retryInterval);
+ } catch (InterruptedException ie) {
+ LOG.warn("Client retry sleep interrupted! ");
+ }
+ }
+ throw new ClientHandlerException("Failed to connect to timeline server. "
+ + "Connection retries limit exceeded. "
+ + "The posted timeline event may be missing");
+ };
+ }
+
public TimelineClientImpl() {
super(TimelineClientImpl.class.getName());
}
@@ -126,6 +204,8 @@ public class TimelineClientImpl extends TimelineClient {
client = new Client(new URLConnectionClientHandler(
new TimelineURLConnectionFactory()), cc);
token = new DelegationTokenAuthenticatedURL.Token();
+ retryFilter = new TimelineJerseyRetryFilter(conf);
+ client.addFilter(retryFilter);
if (YarnConfiguration.useHttps(conf)) {
resURI = URI
@@ -228,6 +308,12 @@ public class TimelineClientImpl extends TimelineClient {
}
}
+ @Private
+ @VisibleForTesting
+ public TimelineJerseyRetryFilter getRetryFilter() {
+ return retryFilter;
+ }
+
@Private
@VisibleForTesting
public ClientResponse doPostingObject(Object object, String path) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
index bba42636673..3c3d7e3ad2c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml
@@ -1322,6 +1322,22 @@
/etc/krb5.keytab
+
+
+ Default maximum number of retires for timeline servive client.
+
+ yarn.timeline-service.client.max-retries
+ 30
+
+
+
+
+ Default retry time interval for timeline servive client.
+
+ yarn.timeline-service.client.retry-interval-ms
+ 1000
+
+
Whether the shared cache is enabled
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
index 13015562812..749a29397a6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java
@@ -181,6 +181,32 @@ public class TestTimelineClient {
}
}
+ @Test
+ public void testCheckRetryCount() throws Exception {
+ int newMaxRetries = 1;
+ long newIntervalMs = 1500;
+ YarnConfiguration conf = new YarnConfiguration();
+ conf.setInt(YarnConfiguration.TIMELINE_SERVICE_CLIENT_MAX_RETRIES,
+ newMaxRetries);
+ conf.setLong(YarnConfiguration.TIMELINE_SERVICE_CLIENT_RETRY_INTERVAL_MS,
+ newIntervalMs);
+ conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
+ TimelineClientImpl client = createTimelineClient(conf);
+ try {
+ // This call should fail because there is no timeline server
+ client.putEntities(generateEntity());
+ Assert.fail("Exception expected!"
+ + "Timeline server should be off to run this test. ");
+ } catch (ClientHandlerException ce) {
+ Assert.assertTrue(
+ "Handler exception for reason other than retry: " + ce.getMessage(),
+ ce.getMessage().contains("Connection retries limit exceeded"));
+ // we would expect this exception here, check if the client has retried
+ Assert.assertTrue("Retry filter didn't perform any retries! ", client
+ .getRetryFilter().retried);
+ }
+ }
+
private static ClientResponse mockEntityClientResponse(
TimelineClientImpl client, ClientResponse.Status status,
boolean hasError, boolean hasRuntimeError) {