YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue on nodelabel recovery. (Bibin A Chundatt via wangda)

This commit is contained in:
Wangda Tan 2015-07-30 10:00:31 -07:00
parent 8acb30b016
commit 91b42e7d6e
3 changed files with 59 additions and 6 deletions

View File

@ -713,6 +713,9 @@ Release 2.8.0 - UNRELEASED
YARN-3963. AddNodeLabel on duplicate label addition shows success.
(Bibin A Chundatt via wangda)
YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue
on nodelabel recovery. (Bibin A Chundatt via wangda)
Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -33,8 +33,8 @@ import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
@ -114,9 +114,15 @@ public class RMNodeLabelsManager extends CommonNodeLabelsManager {
throws IOException {
try {
writeLock.lock();
checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove);
if (getServiceState() == Service.STATE.STARTED) {
// We cannot remove node labels from collection when some queue(s) are
// using any of them.
// We will only do this check when service starting finished. Before
// service starting, we will replay edit logs and recover state. It is
// possible that a history operation removed some labels which were being
// used by some queues in the past but not used by current queues.
checkRemoveFromClusterNodeLabelsOfQueue(labelsToRemove);
}
// copy before NMs
Map<String, Host> before = cloneNodeMap();

View File

@ -18,7 +18,10 @@
package org.apache.hadoop.yarn.server.resourcemanager.nodelabels;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -31,6 +34,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
import org.apache.hadoop.yarn.nodelabels.NodeLabelTestBase;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.junit.After;
import org.junit.Assert;
@ -46,7 +50,8 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase {
private final Resource LARGE_NODE = Resource.newInstance(1000, 0);
NullRMNodeLabelsManager mgr = null;
RMNodeLabelsManager lmgr = null;
boolean checkQueueCall = false;
@Before
public void before() {
mgr = new NullRMNodeLabelsManager();
@ -507,6 +512,45 @@ public class TestRMNodeLabelsManager extends NodeLabelTestBase {
checkNodeLabelInfo(infos, "z", 0, 0);
}
@Test(timeout = 60000)
public void testcheckRemoveFromClusterNodeLabelsOfQueue() throws Exception {
class TestRMLabelManger extends RMNodeLabelsManager {
@Override
protected void checkRemoveFromClusterNodeLabelsOfQueue(
Collection<String> labelsToRemove) throws IOException {
checkQueueCall = true;
// Do nothing
}
}
lmgr = new TestRMLabelManger();
Configuration conf = new Configuration();
File tempDir = File.createTempFile("nlb", ".tmp");
tempDir.delete();
tempDir.mkdirs();
tempDir.deleteOnExit();
conf.set(YarnConfiguration.FS_NODE_LABELS_STORE_ROOT_DIR,
tempDir.getAbsolutePath());
conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
MockRM rm = new MockRM(conf) {
@Override
public RMNodeLabelsManager createNodeLabelManager() {
return lmgr;
}
};
lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a"));
lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" }));
rm.getRMContext().setNodeLabelManager(lmgr);
rm.start();
lmgr.addToCluserNodeLabelsWithDefaultExclusivity(toSet("a"));
Assert.assertEquals(false, checkQueueCall);
lmgr.removeFromClusterNodeLabels(Arrays.asList(new String[] { "a" }));
Assert.assertEquals(true, checkQueueCall);
lmgr.stop();
lmgr.close();
rm.stop();
}
@Test(timeout = 5000)
public void testLabelsToNodesOnNodeActiveDeactive() throws Exception {
// Activate a node without assigning any labels