YARN-3646. Applications are getting stuck some times in case of retry

policy forever. Contributed by Raju Bairishetti.

(cherry picked from commit 0305316d69)
This commit is contained in:
Devaraj K 2015-05-21 20:14:44 +05:30
parent 6d7e7ef1c4
commit 157ecb2241
3 changed files with 43 additions and 7 deletions

View File

@ -496,6 +496,9 @@ Release 2.7.1 - UNRELEASED
YARN-3694. Fix dead link for TimelineServer REST API. YARN-3694. Fix dead link for TimelineServer REST API.
(Jagadesh Kiran N via aajisaka) (Jagadesh Kiran N via aajisaka)
YARN-3646. Applications are getting stuck some times in case of retry
policy forever. (Raju Bairishetti via devaraj)
Release 2.7.0 - 2015-04-20 Release 2.7.0 - 2015-04-20
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1265,4 +1265,36 @@ public class TestYarnClient {
ReservationSystemTestUtil.reservationQ); ReservationSystemTestUtil.reservationQ);
return request; return request;
} }
@Test(timeout = 30000, expected = ApplicationNotFoundException.class)
public void testShouldNotRetryForeverForNonNetworkExceptions() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.setInt(YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS, -1);
ResourceManager rm = null;
YarnClient yarnClient = null;
try {
// start rm
rm = new ResourceManager();
rm.init(conf);
rm.start();
yarnClient = YarnClient.createYarnClient();
yarnClient.init(conf);
yarnClient.start();
// create invalid application id
ApplicationId appId = ApplicationId.newInstance(1430126768L, 10645);
// RM should throw ApplicationNotFoundException exception
yarnClient.getApplicationReport(appId);
} finally {
if (yarnClient != null) {
yarnClient.stop();
}
if (rm != null) {
rm.stop();
}
}
}
} }

View File

@ -224,19 +224,20 @@ public class RMProxy<T> {
failoverSleepBaseMs, failoverSleepMaxMs); failoverSleepBaseMs, failoverSleepMaxMs);
} }
if (waitForEver) {
return RetryPolicies.RETRY_FOREVER;
}
if (rmConnectionRetryIntervalMS < 0) { if (rmConnectionRetryIntervalMS < 0) {
throw new YarnRuntimeException("Invalid Configuration. " + throw new YarnRuntimeException("Invalid Configuration. " +
YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS + YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS +
" should not be negative."); " should not be negative.");
} }
RetryPolicy retryPolicy = RetryPolicy retryPolicy = null;
RetryPolicies.retryUpToMaximumTimeWithFixedSleep(rmConnectWaitMS, if (waitForEver) {
rmConnectionRetryIntervalMS, TimeUnit.MILLISECONDS); retryPolicy = RetryPolicies.RETRY_FOREVER;
} else {
retryPolicy =
RetryPolicies.retryUpToMaximumTimeWithFixedSleep(rmConnectWaitMS,
rmConnectionRetryIntervalMS, TimeUnit.MILLISECONDS);
}
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap = Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap =
new HashMap<Class<? extends Exception>, RetryPolicy>(); new HashMap<Class<? extends Exception>, RetryPolicy>();