From f16daa59a47b8fd49c8cca4aee1b66965c5ac67b Mon Sep 17 00:00:00 2001 From: Wangda Tan Date: Mon, 21 Dec 2015 11:30:13 -0800 Subject: [PATCH] YARN-4454. NM to nodelabel mapping going wrong after RM restart. (Bibin A Chundatt via wangda) (cherry picked from commit bc038b382cb2ce561ce718405fbcee4382f3b204) --- hadoop-yarn-project/CHANGES.txt | 3 + .../nodelabels/CommonNodeLabelsManager.java | 3 +- .../server/resourcemanager/TestRMRestart.java | 73 ++++++++++++++++++- 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5f6e14199cb..c1ab2944f94 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1113,6 +1113,9 @@ Release 2.8.0 - UNRELEASED YARN-4461. Redundant nodeLocalityDelay log in LeafQueue (Eric Payne via jlowe) + YARN-4454. NM to nodelabel mapping going wrong after RM restart. + (Bibin A Chundatt via wangda) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java index 8b26cc5df16..172a737f99f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/nodelabels/CommonNodeLabelsManager.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -1074,7 +1075,7 @@ public class CommonNodeLabelsManager extends AbstractService { protected Map> normalizeNodeIdToLabels( Map> nodeIdToLabels) { - Map> newMap = new HashMap>(); + Map> newMap = new TreeMap>(); for (Entry> entry : nodeIdToLabels.entrySet()) { NodeId id = entry.getKey(); Set labels = entry.getValue(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index d2b8eeec710..bad68f4f9e8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -2141,11 +2141,11 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { // Add node Label to Node h1->x NodeId n1 = NodeId.newInstance("h1", 0); nodeLabelManager.addLabelsToNode(ImmutableMap.of(n1, toSet("x"))); - + clusterNodeLabels.remove("z"); // Remove cluster label z nodeLabelManager.removeFromClusterNodeLabels(toSet("z")); - + // Replace nodelabel h1->x,y nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("y"))); @@ -2179,8 +2179,8 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { rm2.start(); nodeLabelManager = rm2.getRMContext().getNodeLabelManager(); - Assert.assertEquals(clusterNodeLabels.size(), nodeLabelManager - .getClusterNodeLabelNames().size()); + Assert.assertEquals(clusterNodeLabels.size(), + nodeLabelManager.getClusterNodeLabelNames().size()); nodeLabels = nodeLabelManager.getNodeLabels(); Assert.assertEquals(1, nodeLabelManager.getNodeLabels().size()); @@ -2256,4 +2256,69 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { return set; } + @Test(timeout = 20000) + public void testRMRestartNodeMapping() throws Exception { + // Initial FS node label store root dir to a random tmp dir + File nodeLabelFsStoreDir = new File("target", + this.getClass().getSimpleName() + "-testRMRestartNodeMapping"); + if (nodeLabelFsStoreDir.exists()) { + FileUtils.deleteDirectory(nodeLabelFsStoreDir); + } + nodeLabelFsStoreDir.deleteOnExit(); + String nodeLabelFsStoreDirURI = nodeLabelFsStoreDir.toURI().toString(); + conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, + nodeLabelFsStoreDirURI); + + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + MockRM rm1 = new MockRM(conf, memStore) { + @Override + protected RMNodeLabelsManager createNodeLabelManager() { + RMNodeLabelsManager mgr = new RMNodeLabelsManager(); + mgr.init(getConfig()); + return mgr; + } + }; + rm1.init(conf); + rm1.start(); + RMNodeLabelsManager nodeLabelManager = + rm1.getRMContext().getNodeLabelManager(); + + Set clusterNodeLabels = new HashSet(); + clusterNodeLabels.add("x"); + clusterNodeLabels.add("y"); + nodeLabelManager + .addToCluserNodeLabelsWithDefaultExclusivity(clusterNodeLabels); + // Add node Label to Node h1->x + NodeId n1 = NodeId.newInstance("h1", 1234); + NodeId n2 = NodeId.newInstance("h1", 1235); + NodeId nihost = NodeId.newInstance("h1", 0); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("x"))); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n2, toSet("x"))); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(nihost, toSet("y"))); + nodeLabelManager.replaceLabelsOnNode(ImmutableMap.of(n1, toSet("x"))); + MockRM rm2 = null; + for (int i = 0; i < 2; i++) { + rm2 = new MockRM(conf, memStore) { + @Override + protected RMNodeLabelsManager createNodeLabelManager() { + RMNodeLabelsManager mgr = new RMNodeLabelsManager(); + mgr.init(getConfig()); + return mgr; + } + }; + rm2.init(conf); + rm2.start(); + + nodeLabelManager = rm2.getRMContext().getNodeLabelManager(); + Map> labelsToNodes = + nodeLabelManager.getLabelsToNodes(toSet("x")); + Assert.assertEquals(1, + null == labelsToNodes.get("x") ? 0 : labelsToNodes.get("x").size()); + } + rm1.stop(); + rm2.stop(); + } + }