From 8269bfa613999f71767de3c0369817b58cfe1416 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Thu, 25 Sep 2014 22:37:05 +0000 Subject: [PATCH] YARN-2523. ResourceManager UI showing negative value for "Decommissioned Nodes" field. Contributed by Rohith --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../resourcemanager/NodesListManager.java | 1 - .../resourcemanager/rmnode/RMNodeImpl.java | 13 ---------- .../server/resourcemanager/TestRMRestart.java | 26 ++++++++++++++++--- .../TestResourceTrackerService.java | 25 ++++++++++++++++-- 5 files changed, 48 insertions(+), 20 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index aaac7b71386..bbda48d15ac 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -447,6 +447,9 @@ Release 2.6.0 - UNRELEASED YARN-2546. Made REST API for application creation/submission use numeric and boolean types instead of the string of them. (Varun Vasudev via zjshen) + YARN-2523. ResourceManager UI showing negative value for "Decommissioned + Nodes" field (Rohith via jlowe) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java index d7797cc5ba7..90d7b51bc62 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/NodesListManager.java @@ -121,7 +121,6 @@ public class NodesListManager extends AbstractService implements this.conf, includesFile), excludesFile.isEmpty() ? null : this.rmContext.getConfigurationProvider() .getConfigurationInputStream(this.conf, excludesFile)); - setDecomissionedNMsMetrics(); printConfiguredHosts(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index f0ae826ee5e..1123a9818f5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -460,22 +460,9 @@ public class RMNodeImpl implements RMNode, EventHandler { break; } - // Decomissioned NMs equals to the nodes missing in include list (if - // include list not empty) or the nodes listed in excluded list. - // DecomissionedNMs as per exclude list is set upfront when the - // exclude list is read so that RM restart can also reflect the - // decomissionedNMs. Note that RM is still not able to know decomissionedNMs - // as per include list after it restarts as they are known when those nodes - // come for registration. - // DecomissionedNMs as per include list is incremented in this transition. switch (finalState) { case DECOMMISSIONED: - Set ecludedHosts = - context.getNodesListManager().getHostsReader().getExcludedHosts(); - if (!ecludedHosts.contains(hostName) - && !ecludedHosts.contains(NetUtils.normalizeHostName(hostName))) { metrics.incrDecommisionedNMs(); - } break; case LOST: metrics.incrNumLostNMs(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index caa5647c738..0b3a364c455 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -77,6 +77,8 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; @@ -1833,10 +1835,16 @@ public class TestRMRestart { conf.set(YarnConfiguration.RM_NODES_EXCLUDE_FILE_PATH, hostFile.getAbsolutePath()); writeToHostsFile(""); - MockRM rm1 = new MockRM(conf); + final DrainDispatcher dispatcher = new DrainDispatcher(); + MockRM rm1 = new MockRM(conf) { + @Override + protected Dispatcher createDispatcher() { + return dispatcher; + } + }; rm1.start(); - rm1.registerNode("localhost:1234", 8000); - rm1.registerNode("host2:1234", 8000); + MockNM nm1 = rm1.registerNode("localhost:1234", 8000); + MockNM nm2 = rm1.registerNode("host2:1234", 8000); Assert .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); String ip = NetUtils.normalizeHostName("localhost"); @@ -1845,15 +1853,25 @@ public class TestRMRestart { // refresh nodes rm1.getNodesListManager().refreshNodes(conf); + NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); + Assert + .assertTrue(NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); + nodeHeartbeat = nm2.nodeHeartbeat(true); + Assert.assertTrue("The decommisioned metrics are not updated", + NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); + + dispatcher.await(); Assert .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); + rm1.stop(); + Assert + .assertEquals(0, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); // restart RM. MockRM rm2 = new MockRM(conf); rm2.start(); Assert .assertEquals(2, ClusterMetrics.getMetrics().getNumDecommisionedNMs()); - rm1.stop(); rm2.stop(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java index 877a12215ed..077f4647c15 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceTrackerService.java @@ -157,25 +157,33 @@ public class TestResourceTrackerService { .getAbsolutePath()); writeToHostsFile(""); - rm = new MockRM(conf); + final DrainDispatcher dispatcher = new DrainDispatcher(); + rm = new MockRM(conf) { + @Override + protected Dispatcher createDispatcher() { + return dispatcher; + } + }; rm.start(); MockNM nm1 = rm.registerNode("host1:1234", 5120); MockNM nm2 = rm.registerNode("host2:5678", 10240); MockNM nm3 = rm.registerNode("localhost:4433", 1024); + dispatcher.await(); + int metricCount = ClusterMetrics.getMetrics().getNumDecommisionedNMs(); NodeHeartbeatResponse nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); nodeHeartbeat = nm2.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + dispatcher.await(); // To test that IPs also work String ip = NetUtils.normalizeHostName("localhost"); writeToHostsFile("host2", ip); rm.getNodesListManager().refreshNodes(conf); - checkDecommissionedNMCount(rm, metricCount + 2); nodeHeartbeat = nm1.nodeHeartbeat(true); Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); @@ -186,6 +194,19 @@ public class TestResourceTrackerService { nodeHeartbeat = nm3.nodeHeartbeat(true); Assert.assertTrue("The decommisioned metrics are not updated", NodeAction.SHUTDOWN.equals(nodeHeartbeat.getNodeAction())); + dispatcher.await(); + checkDecommissionedNMCount(rm, metricCount + 2); + writeToHostsFile(""); + rm.getNodesListManager().refreshNodes(conf); + + nm3 = rm.registerNode("localhost:4433", 1024); + dispatcher.await(); + nodeHeartbeat = nm3.nodeHeartbeat(true); + dispatcher.await(); + Assert.assertTrue(NodeAction.NORMAL.equals(nodeHeartbeat.getNodeAction())); + // decommissined node is 1 since 1 node is rejoined after updating exclude + // file + checkDecommissionedNMCount(rm, metricCount + 1); } /**