YARN-4041. Slow delegation token renewal can severely prolong RM recovery. Contributed by Sunil G

(cherry picked from commit d3a34a4f38)

Conflicts:

	hadoop-yarn-project/CHANGES.txt
This commit is contained in:
Jason Lowe 2015-10-23 20:59:28 +00:00
parent 49a7d70f53
commit 2b6ce20e1c
4 changed files with 86 additions and 24 deletions

View File

@ -91,6 +91,9 @@ Release 2.7.2 - UNRELEASED
YARN-4000. RM crashes with NPE if leaf queue becomes parent queue during restart. YARN-4000. RM crashes with NPE if leaf queue becomes parent queue during restart.
(Varun Saxena via jianhe) (Varun Saxena via jianhe)
YARN-4041. Slow delegation token renewal can severely prolong RM recovery
(Sunil G via jlowe)
Release 2.7.1 - 2015-07-06 Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -854,14 +854,16 @@ public class RMAppImpl implements RMApp, Recoverable {
} }
if (UserGroupInformation.isSecurityEnabled()) { if (UserGroupInformation.isSecurityEnabled()) {
// synchronously renew delegation token on recovery. // asynchronously renew delegation token on recovery.
try { try {
app.rmContext.getDelegationTokenRenewer().addApplicationSync( app.rmContext.getDelegationTokenRenewer()
app.getApplicationId(), app.parseCredentials(), .addApplicationAsyncDuringRecovery(app.getApplicationId(),
app.submissionContext.getCancelTokensWhenComplete(), app.getUser()); app.parseCredentials(),
app.submissionContext.getCancelTokensWhenComplete(),
app.getUser());
} catch (Exception e) { } catch (Exception e) {
String msg = "Failed to renew token for " + app.applicationId String msg = "Failed to fetch user credentials from application:"
+ " on recovery : " + e.getMessage(); + e.getMessage();
app.diagnostics.append(msg); app.diagnostics.append(msg);
LOG.error(msg, e); LOG.error(msg, e);
} }

View File

@ -385,6 +385,25 @@ public class DelegationTokenRenewer extends AbstractService {
applicationId, ts, shouldCancelAtEnd, user)); applicationId, ts, shouldCancelAtEnd, user));
} }
/**
* Asynchronously add application tokens for renewal.
*
* @param applicationId
* added application
* @param ts
* tokens
* @param shouldCancelAtEnd
* true if tokens should be canceled when the app is done else false.
* @param user
* user
*/
public void addApplicationAsyncDuringRecovery(ApplicationId applicationId,
Credentials ts, boolean shouldCancelAtEnd, String user) {
processDelegationTokenRenewerEvent(
new DelegationTokenRenewerAppRecoverEvent(applicationId, ts,
shouldCancelAtEnd, user));
}
/** /**
* Synchronously renew delegation tokens. * Synchronously renew delegation tokens.
* @param user user * @param user user
@ -396,7 +415,7 @@ public class DelegationTokenRenewer extends AbstractService {
applicationId, ts, shouldCancelAtEnd, user)); applicationId, ts, shouldCancelAtEnd, user));
} }
private void handleAppSubmitEvent(DelegationTokenRenewerAppSubmitEvent evt) private void handleAppSubmitEvent(AbstractDelegationTokenRenewerAppEvent evt)
throws IOException, InterruptedException { throws IOException, InterruptedException {
ApplicationId applicationId = evt.getApplicationId(); ApplicationId applicationId = evt.getApplicationId();
Credentials ts = evt.getCredentials(); Credentials ts = evt.getCredentials();
@ -825,6 +844,10 @@ public class DelegationTokenRenewer extends AbstractService {
DelegationTokenRenewerAppSubmitEvent appSubmitEvt = DelegationTokenRenewerAppSubmitEvent appSubmitEvt =
(DelegationTokenRenewerAppSubmitEvent) evt; (DelegationTokenRenewerAppSubmitEvent) evt;
handleDTRenewerAppSubmitEvent(appSubmitEvt); handleDTRenewerAppSubmitEvent(appSubmitEvt);
} else if (evt instanceof DelegationTokenRenewerAppRecoverEvent) {
DelegationTokenRenewerAppRecoverEvent appRecoverEvt =
(DelegationTokenRenewerAppRecoverEvent) evt;
handleDTRenewerAppRecoverEvent(appRecoverEvt);
} else if (evt.getType().equals( } else if (evt.getType().equals(
DelegationTokenRenewerEventType.FINISH_APPLICATION)) { DelegationTokenRenewerEventType.FINISH_APPLICATION)) {
DelegationTokenRenewer.this.handleAppFinishEvent(evt); DelegationTokenRenewer.this.handleAppFinishEvent(evt);
@ -860,16 +883,49 @@ public class DelegationTokenRenewer extends AbstractService {
} }
} }
static class DelegationTokenRenewerAppSubmitEvent extends @SuppressWarnings("unchecked")
private void handleDTRenewerAppRecoverEvent(
DelegationTokenRenewerAppRecoverEvent event) {
try {
// Setup tokens for renewal during recovery
DelegationTokenRenewer.this.handleAppSubmitEvent(event);
} catch (Throwable t) {
LOG.warn(
"Unable to add the application to the delegation token renewer.", t);
}
}
static class DelegationTokenRenewerAppSubmitEvent
extends
AbstractDelegationTokenRenewerAppEvent {
public DelegationTokenRenewerAppSubmitEvent(ApplicationId appId,
Credentials credentails, boolean shouldCancelAtEnd, String user) {
super(appId, credentails, shouldCancelAtEnd, user,
DelegationTokenRenewerEventType.VERIFY_AND_START_APPLICATION);
}
}
static class DelegationTokenRenewerAppRecoverEvent
extends
AbstractDelegationTokenRenewerAppEvent {
public DelegationTokenRenewerAppRecoverEvent(ApplicationId appId,
Credentials credentails, boolean shouldCancelAtEnd, String user) {
super(appId, credentails, shouldCancelAtEnd, user,
DelegationTokenRenewerEventType.RECOVER_APPLICATION);
}
}
static class AbstractDelegationTokenRenewerAppEvent extends
DelegationTokenRenewerEvent { DelegationTokenRenewerEvent {
private Credentials credentials; private Credentials credentials;
private boolean shouldCancelAtEnd; private boolean shouldCancelAtEnd;
private String user; private String user;
public DelegationTokenRenewerAppSubmitEvent(ApplicationId appId, public AbstractDelegationTokenRenewerAppEvent(ApplicationId appId,
Credentials credentails, boolean shouldCancelAtEnd, String user) { Credentials credentails, boolean shouldCancelAtEnd, String user,
super(appId, DelegationTokenRenewerEventType.VERIFY_AND_START_APPLICATION); DelegationTokenRenewerEventType type) {
super(appId, type);
this.credentials = credentails; this.credentials = credentails;
this.shouldCancelAtEnd = shouldCancelAtEnd; this.shouldCancelAtEnd = shouldCancelAtEnd;
this.user = user; this.user = user;
@ -890,6 +946,7 @@ public class DelegationTokenRenewer extends AbstractService {
enum DelegationTokenRenewerEventType { enum DelegationTokenRenewerEventType {
VERIFY_AND_START_APPLICATION, VERIFY_AND_START_APPLICATION,
RECOVER_APPLICATION,
FINISH_APPLICATION FINISH_APPLICATION
} }

View File

@ -1166,24 +1166,24 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase {
// Need to wait for a while as now token renewal happens on another thread // Need to wait for a while as now token renewal happens on another thread
// and is asynchronous in nature. // and is asynchronous in nature.
waitForTokensToBeRenewed(rm2); waitForTokensToBeRenewed(rm2, tokenSet);
// verify tokens are properly populated back to rm2 DelegationTokenRenewer // verify tokens are properly populated back to rm2 DelegationTokenRenewer
Assert.assertEquals(tokenSet, rm2.getRMContext() Assert.assertEquals(tokenSet, rm2.getRMContext()
.getDelegationTokenRenewer().getDelegationTokens()); .getDelegationTokenRenewer().getDelegationTokens());
} }
private void waitForTokensToBeRenewed(MockRM rm2) throws Exception { private void waitForTokensToBeRenewed(MockRM rm2,
int waitCnt = 20; HashSet<Token<RMDelegationTokenIdentifier>> tokenSet) throws Exception {
boolean atleastOneAppInNEWState = true; // Max wait time to get the token renewal can be kept as 1sec (100 * 10ms)
while (waitCnt-- > 0 && atleastOneAppInNEWState) { int waitCnt = 100;
atleastOneAppInNEWState = false; while (waitCnt-- > 0) {
for (RMApp rmApp : rm2.getRMContext().getRMApps().values()) { if (tokenSet.equals(rm2.getRMContext().getDelegationTokenRenewer()
if (rmApp.getState() == RMAppState.NEW) { .getDelegationTokens())) {
Thread.sleep(1000); // Stop waiting as tokens are populated to DelegationTokenRenewer.
atleastOneAppInNEWState = true;
break; break;
} } else {
Thread.sleep(10);
} }
} }
} }