diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 4c59da63d45..f027c29300c 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -713,6 +713,9 @@ Release 2.8.0 - UNRELEASED YARN-3963. AddNodeLabel on duplicate label addition shows success. (Bibin A Chundatt via wangda) + YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue + on nodelabel recovery. (Bibin A Chundatt via wangda) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java index 10092ca9a63..8587bdaef4b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/RMNodeLabelsManager.java @@ -33,8 +33,8 @@ import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.service.Service; import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.nodelabels.RMNodeLabel; @@ -114,9 +114,15 @@ public class RMNodeLabelsManager extends CommonNodeLabelsManager { throws IOException { try { writeLock.lock(); - - checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove); - + if (getServiceState() == Service.STATE.STARTED) { + // We cannot remove node labels from collection when some queue(s) are + // using any of them. + // We will only do this check when service starting finished. Before + // service starting, we will replay edit logs and recover state. It is + // possible that a history operation removed some labels which were being + // used by some queues in the past but not used by current queues. + checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove); + } // copy before NMs Map before = cloneNodeMap(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java index 05bb1e57e14..79408655e56 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/nodelabels/TestRMNodeLabelsManager.java @@ -18,7 +18,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.nodelabels; +import java.io.File; import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -31,6 +34,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.nodelabels.RMNodeLabel; import org.apache.hadoop.yarn.nodelabels.NodeLabelTestBase; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.After; import org.junit.Assert; @@ -46,7 +50,8 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { private final Resource LARGE_NODE = Resource.newInstance(1000, 0); NullRMNodeLabelsManager mgr = null; - + RMNodeLabelsManager lmgr = null; + boolean checkQueueCall = false; @Before public void before() { mgr = new NullRMNodeLabelsManager(); @@ -506,7 +511,46 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase { checkNodeLabelInfo(infos, "y", 1, 10); checkNodeLabelInfo(infos, "z", 0, 0); } - + + @Test(timeout = 60000) + public void testcheckRemoveFromClusterNodeLabelsOfQueue() throws Exception { + class TestRMLabelManger extends RMNodeLabelsManager { + @Override + protected void checkRemoveFromClusterNodeLabelsOfQueue( + Collection labelsToRemove) throws IOException { + checkQueueCall = true; + // Do nothing + } + + } + lmgr = new TestRMLabelManger(); + Configuration conf = new Configuration(); + File tempDir = File.createTempFile("nlb", ".tmp"); + tempDir.delete(); + tempDir.mkdirs(); + tempDir.deleteOnExit(); + conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR, + tempDir.getAbsolutePath()); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + MockRM rm = new MockRM(conf) { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return lmgr; + } + }; + lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a")); + lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" })); + rm.getRMContext().setNodeLabelManager(lmgr); + rm.start(); + lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a")); + Assert.assertEquals(false, checkQueueCall); + lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" })); + Assert.assertEquals(true, checkQueueCall); + lmgr.stop(); + lmgr.close(); + rm.stop(); + } + @Test(timeout = 5000) public void testLabelsToNodesOnNodeActiveDeactive() throws Exception { // Activate a node without assigning any labels