YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect to RM. (Junpin Du via rohithsharmaks)
(cherry picked from commit 15c3e7ffe3
)
This commit is contained in:
parent
472541291b
commit
f626d18cc3
|
@ -1056,6 +1056,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
YARN-4392. ApplicationCreatedEvent event time resets after RM restart/failover.
|
YARN-4392. ApplicationCreatedEvent event time resets after RM restart/failover.
|
||||||
(Naganarasimha G R and Xuan Gong via xgong)
|
(Naganarasimha G R and Xuan Gong via xgong)
|
||||||
|
|
||||||
|
YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect
|
||||||
|
to RM. (Junping Du via rohithsharmaks)
|
||||||
|
|
||||||
Release 2.7.3 - UNRELEASED
|
Release 2.7.3 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -134,6 +134,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
|
|
||||||
private Runnable statusUpdaterRunnable;
|
private Runnable statusUpdaterRunnable;
|
||||||
private Thread statusUpdater;
|
private Thread statusUpdater;
|
||||||
|
private boolean failedToConnect = false;
|
||||||
private long rmIdentifier = ResourceManagerConstants.RM_INVALID_IDENTIFIER;
|
private long rmIdentifier = ResourceManagerConstants.RM_INVALID_IDENTIFIER;
|
||||||
private boolean registeredWithRM = false;
|
private boolean registeredWithRM = false;
|
||||||
Set<ContainerId> pendingContainersToRemove = new HashSet<ContainerId>();
|
Set<ContainerId> pendingContainersToRemove = new HashSet<ContainerId>();
|
||||||
|
@ -241,7 +242,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
// the isStopped check is for avoiding multiple unregistrations.
|
// the isStopped check is for avoiding multiple unregistrations.
|
||||||
if (this.registeredWithRM && !this.isStopped
|
if (this.registeredWithRM && !this.isStopped
|
||||||
&& !isNMUnderSupervisionWithRecoveryEnabled()
|
&& !isNMUnderSupervisionWithRecoveryEnabled()
|
||||||
&& !context.getDecommissioned()) {
|
&& !context.getDecommissioned() && !failedToConnect) {
|
||||||
unRegisterNM();
|
unRegisterNM();
|
||||||
}
|
}
|
||||||
// Interrupt the updater.
|
// Interrupt the updater.
|
||||||
|
@ -823,6 +824,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
|
||||||
//catch and throw the exception if tried MAX wait time to connect RM
|
//catch and throw the exception if tried MAX wait time to connect RM
|
||||||
dispatcher.getEventHandler().handle(
|
dispatcher.getEventHandler().handle(
|
||||||
new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
|
new NodeManagerEvent(NodeManagerEventType.SHUTDOWN));
|
||||||
|
// failed to connect to RM.
|
||||||
|
failedToConnect = true;
|
||||||
throw new YarnRuntimeException(e);
|
throw new YarnRuntimeException(e);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue