YARN-3646. Applications are getting stuck some times in case of retry

policy forever. Contributed by Raju Bairishetti.
This commit is contained in:
Devaraj K 2015-05-21 20:14:44 +05:30
parent a5def58087
commit 0305316d69
3 changed files with 43 additions and 7 deletions

View File

@ -538,6 +538,9 @@ Release 2.7.1 - UNRELEASED
YARN-3694. Fix dead link for TimelineServer REST API.
(Jagadesh Kiran N via aajisaka)
YARN-3646. Applications are getting stuck some times in case of retry
policy forever. (Raju Bairishetti via devaraj)
Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES

View File

@ -1265,4 +1265,36 @@ public class TestYarnClient {
ReservationSystemTestUtil.reservationQ);
return request;
}
@Test(timeout = 30000, expected = ApplicationNotFoundException.class)
public void testShouldNotRetryForeverForNonNetworkExceptions() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, -1);
ResourceManager rm = null;
YarnClient yarnClient = null;
try {
// start rm
rm = new ResourceManager();
rm.init(conf);
rm.start();
yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
yarnClient.start();
// create invalid application id
ApplicationId appId = ApplicationId.newInstance(1430126768L, 10645);
// RM should throw ApplicationNotFoundException exception
yarnClient.getApplicationReport(appId);
} finally {
if (yarnClient != null) {
yarnClient.stop();
}
if (rm != null) {
rm.stop();
}
}
}
}

View File

@ -224,19 +224,20 @@ public class RMProxy<T> {
failoverSleepBaseMs, failoverSleepMaxMs);
}
if (waitForEver) {
return RetryPolicies.RETRY_FOREVER;
}
if (rmConnectionRetryIntervalMS < 0) {
throw new YarnRuntimeException("Invalid Configuration. " +
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS +
" should not be negative.");
}
RetryPolicy retryPolicy =
RetryPolicies.retryUpToMaximumTimeWithFixedSleep(rmConnectWaitMS,
rmConnectionRetryIntervalMS, TimeUnit.MILLISECONDS);
RetryPolicy retryPolicy = null;
if (waitForEver) {
retryPolicy = RetryPolicies.RETRY_FOREVER;
} else {
retryPolicy =
RetryPolicies.retryUpToMaximumTimeWithFixedSleep(rmConnectWaitMS,
rmConnectionRetryIntervalMS, TimeUnit.MILLISECONDS);
}
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
new HashMap<Class<? extends Exception>, RetryPolicy>();