YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect to RM. (Junpin Du via rohithsharmaks)

(cherry picked from commit 15c3e7ffe3)
This commit is contained in:
rohithsharmaks 2015-12-09 10:50:43 +05:30
parent 472541291b
commit f626d18cc3
2 changed files with 7 additions and 1 deletions

View File

@ -1056,6 +1056,9 @@ Release 2.8.0 - UNRELEASED
YARN-4392. ApplicationCreatedEvent event time resets after RM restart/failover. YARN-4392. ApplicationCreatedEvent event time resets after RM restart/failover.
(Naganarasimha G R and Xuan Gong via xgong) (Naganarasimha G R and Xuan Gong via xgong)
YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect
to RM. (Junping Du via rohithsharmaks)
Release 2.7.3 - UNRELEASED Release 2.7.3 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -134,6 +134,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
private Runnable statusUpdaterRunnable; private Runnable statusUpdaterRunnable;
private Thread statusUpdater; private Thread statusUpdater;
private boolean failedToConnect = false;
private long rmIdentifier = ResourceManagerConstants.RM_INVALID_IDENTIFIER; private long rmIdentifier = ResourceManagerConstants.RM_INVALID_IDENTIFIER;
private boolean registeredWithRM = false; private boolean registeredWithRM = false;
Set<ContainerId> pendingContainersToRemove = new HashSet<ContainerId>(); Set<ContainerId> pendingContainersToRemove = new HashSet<ContainerId>();
@ -241,7 +242,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
// the isStopped check is for avoiding multiple unregistrations. // the isStopped check is for avoiding multiple unregistrations.
if (this.registeredWithRM && !this.isStopped if (this.registeredWithRM && !this.isStopped
&& !isNMUnderSupervisionWithRecoveryEnabled() && !isNMUnderSupervisionWithRecoveryEnabled()
&& !context.getDecommissioned()) { && !context.getDecommissioned() && !failedToConnect) {
unRegisterNM(); unRegisterNM();
} }
// Interrupt the updater. // Interrupt the updater.
@ -823,6 +824,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
//catch and throw the exception if tried MAX wait time to connect RM //catch and throw the exception if tried MAX wait time to connect RM
dispatcher.getEventHandler().handle( dispatcher.getEventHandler().handle(
new NodeManagerEvent(NodeManagerEventType.SHUTDOWN)); new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
// failed to connect to RM.
failedToConnect = true;
throw new YarnRuntimeException(e); throw new YarnRuntimeException(e);
} catch (Throwable e) { } catch (Throwable e) {