diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index ed31d08a6a2..0c920df719f 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1031,6 +1031,9 @@ Release 2.8.0 - UNRELEASED YARN-4408. Fix issue that NodeManager reports negative running containers. (Robert Kanter via junping_du) + YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect + to RM. (Junping Du via rohithsharmaks) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 34267b373fa..ba915c27cb8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -134,6 +134,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements private Runnable statusUpdaterRunnable; private Thread statusUpdater; + private boolean failedToConnect = false; private long rmIdentifier = ResourceManagerConstants.RM_INVALID_IDENTIFIER; private boolean registeredWithRM = false; Set pendingContainersToRemove = new HashSet(); @@ -241,7 +242,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements // the isStopped check is for avoiding multiple unregistrations. if (this.registeredWithRM && !this.isStopped && !isNMUnderSupervisionWithRecoveryEnabled() - && !context.getDecommissioned()) { + && !context.getDecommissioned() && !failedToConnect) { unRegisterNM(); } // Interrupt the updater. @@ -823,6 +824,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements //catch and throw the exception if tried MAX wait time to connect RM dispatcher.getEventHandler().handle( new NodeManagerEvent(NodeManagerEventType.SHUTDOWN)); + // failed to connect to RM. + failedToConnect = true; throw new YarnRuntimeException(e); } catch (Throwable e) {