YARN-4855. Should check if node exists when replace nodelabels. Contributeed by Tao Jie

This commit is contained in:
Naganarasimha 2016-10-03 02:02:26 -04:00
parent 82857037b6
commit 6e130c308c
7 changed files with 197 additions and 18 deletions

View File

@ -44,4 +44,12 @@ public static ReplaceLabelsOnNodeRequest newInstance(
@Public @Public
@Evolving @Evolving
public abstract Map<NodeId, Set<String>> getNodeToLabels(); public abstract Map<NodeId, Set<String>> getNodeToLabels();
@Public
@Evolving
public abstract void setFailOnUnknownNodes(boolean failOnUnknownNodes);
@Public
@Evolving
public abstract boolean getFailOnUnknownNodes();
} }

View File

@ -99,10 +99,10 @@ message RemoveFromClusterNodeLabelsResponseProto {
message ReplaceLabelsOnNodeRequestProto { message ReplaceLabelsOnNodeRequestProto {
repeated NodeIdToLabelsNameProto nodeToLabels = 1; repeated NodeIdToLabelsNameProto nodeToLabels = 1;
optional bool failOnUnknownNodes = 2;
} }
message ReplaceLabelsOnNodeResponseProto { message ReplaceLabelsOnNodeResponseProto {
} }
message UpdateNodeLabelsResponseProto { message UpdateNodeLabelsResponseProto {

View File

@ -130,11 +130,13 @@ public class RMAdminCLI extends HAAdmin {
new UsageInfo("<label1,label2,label3> (label splitted by \",\")", new UsageInfo("<label1,label2,label3> (label splitted by \",\")",
"remove from cluster node labels")) "remove from cluster node labels"))
.put("-replaceLabelsOnNode", .put("-replaceLabelsOnNode",
new UsageInfo( new UsageInfo("[-failOnUnknownNodes] " +
"<\"node1[:port]=label1,label2 node2[:port]=label1,label2\">", "<\"node1[:port]=label1,label2 node2[:port]=label1,label2\">",
"replace labels on nodes" "replace labels on nodes"
+ " (please note that we do not support specifying multiple" + " (please note that we do not support specifying multiple"
+ " labels on a single host for now.)")) + " labels on a single host for now.)\n\t\t"
+ "[-failOnUnknownNodes] is optional, when we set this"
+ " option, it will fail if specified nodes are unknown."))
.put("-directlyAccessNodeLabelStore", .put("-directlyAccessNodeLabelStore",
new UsageInfo("", "This is DEPRECATED, will be removed in future releases. Directly access node label store, " new UsageInfo("", "This is DEPRECATED, will be removed in future releases. Directly access node label store, "
+ "with this option, all node label related operations" + "with this option, all node label related operations"
@ -246,8 +248,8 @@ private static void printHelp(String cmd, boolean isHAEnabled) {
" [-addToClusterNodeLabels <\"label1(exclusive=true)," " [-addToClusterNodeLabels <\"label1(exclusive=true),"
+ "label2(exclusive=false),label3\">]" + + "label2(exclusive=false),label3\">]" +
" [-removeFromClusterNodeLabels <label1,label2,label3>]" + " [-removeFromClusterNodeLabels <label1,label2,label3>]" +
" [-replaceLabelsOnNode <\"node1[:port]=label1,label2" + " [-replaceLabelsOnNode [-failOnUnknownNodes] "
" node2[:port]=label1\">]" + + "<\"node1[:port]=label1,label2 node2[:port]=label1\">]" +
" [-directlyAccessNodeLabelStore]" + " [-directlyAccessNodeLabelStore]" +
" [-refreshClusterMaxPriority]" + " [-refreshClusterMaxPriority]" +
" [-updateNodeResource [NodeID] [MemSize] [vCores]" + " [-updateNodeResource [NodeID] [MemSize] [vCores]" +
@ -657,14 +659,14 @@ private Map<NodeId, Set<String>> buildNodeLabelsMapFromStr(String args) {
return map; return map;
} }
private int replaceLabelsOnNodes(String args) throws IOException, private int replaceLabelsOnNodes(String args, boolean failOnUnknownNodes)
YarnException { throws IOException, YarnException {
Map<NodeId, Set<String>> map = buildNodeLabelsMapFromStr(args); Map<NodeId, Set<String>> map = buildNodeLabelsMapFromStr(args);
return replaceLabelsOnNodes(map); return replaceLabelsOnNodes(map, failOnUnknownNodes);
} }
private int replaceLabelsOnNodes(Map<NodeId, Set<String>> map) private int replaceLabelsOnNodes(Map<NodeId, Set<String>> map,
throws IOException, YarnException { boolean failOnUnknownNodes) throws IOException, YarnException {
if (directlyAccessNodeLabelStore) { if (directlyAccessNodeLabelStore) {
getNodeLabelManagerInstance(getConf()).replaceLabelsOnNode(map); getNodeLabelManagerInstance(getConf()).replaceLabelsOnNode(map);
} else { } else {
@ -672,6 +674,7 @@ private int replaceLabelsOnNodes(Map<NodeId, Set<String>> map)
createAdminProtocol(); createAdminProtocol();
ReplaceLabelsOnNodeRequest request = ReplaceLabelsOnNodeRequest request =
ReplaceLabelsOnNodeRequest.newInstance(map); ReplaceLabelsOnNodeRequest.newInstance(map);
request.setFailOnUnknownNodes(failOnUnknownNodes);
adminProtocol.replaceLabelsOnNode(request); adminProtocol.replaceLabelsOnNode(request);
} }
return 0; return 0;
@ -783,8 +786,16 @@ public int run(String[] args) throws Exception {
System.err.println(NO_MAPPING_ERR_MSG); System.err.println(NO_MAPPING_ERR_MSG);
printUsage("", isHAEnabled); printUsage("", isHAEnabled);
exitCode = -1; exitCode = -1;
} else if ("-failOnUnknownNodes".equals(args[i])) {
if (i + 1 >= args.length) {
System.err.println(NO_MAPPING_ERR_MSG);
printUsage("", isHAEnabled);
exitCode = -1;
} else { } else {
exitCode = replaceLabelsOnNodes(args[i]); exitCode = replaceLabelsOnNodes(args[i + 1], true);
}
} else {
exitCode = replaceLabelsOnNodes(args[i], false);
} }
} else { } else {
exitCode = -1; exitCode = -1;

View File

@ -469,7 +469,7 @@ public void testHelp() throws Exception {
"[username]] [-addToClusterNodeLabels " + "[username]] [-addToClusterNodeLabels " +
"<\"label1(exclusive=true),label2(exclusive=false),label3\">] " + "<\"label1(exclusive=true),label2(exclusive=false),label3\">] " +
"[-removeFromClusterNodeLabels <label1,label2,label3>] " + "[-removeFromClusterNodeLabels <label1,label2,label3>] " +
"[-replaceLabelsOnNode " + "[-replaceLabelsOnNode [-failOnUnknownNodes] " +
"<\"node1[:port]=label1,label2 node2[:port]=label1\">] " + "<\"node1[:port]=label1,label2 node2[:port]=label1\">] " +
"[-directlyAccessNodeLabelStore] [-refreshClusterMaxPriority] " + "[-directlyAccessNodeLabelStore] [-refreshClusterMaxPriority] " +
"[-updateNodeResource [NodeID] [MemSize] [vCores] " + "[-updateNodeResource [NodeID] [MemSize] [vCores] " +
@ -564,6 +564,7 @@ public void testHelp() throws Exception {
+ " [username]] [-addToClusterNodeLabels <\"label1(exclusive=true)," + " [username]] [-addToClusterNodeLabels <\"label1(exclusive=true),"
+ "label2(exclusive=false),label3\">]" + "label2(exclusive=false),label3\">]"
+ " [-removeFromClusterNodeLabels <label1,label2,label3>] [-replaceLabelsOnNode " + " [-removeFromClusterNodeLabels <label1,label2,label3>] [-replaceLabelsOnNode "
+ "[-failOnUnknownNodes] "
+ "<\"node1[:port]=label1,label2 node2[:port]=label1\">] [-directlyAccessNodeLabelStore] " + "<\"node1[:port]=label1,label2 node2[:port]=label1\">] [-directlyAccessNodeLabelStore] "
+ "[-refreshClusterMaxPriority] " + "[-refreshClusterMaxPriority] "
+ "[-updateNodeResource [NodeID] [MemSize] [vCores] " + "[-updateNodeResource [NodeID] [MemSize] [vCores] "

View File

@ -146,6 +146,18 @@ public void setNodeToLabels(Map<NodeId, Set<String>> map) {
nodeIdToLabels.putAll(map); nodeIdToLabels.putAll(map);
} }
@Override
public boolean getFailOnUnknownNodes() {
ReplaceLabelsOnNodeRequestProtoOrBuilder p = viaProto ? proto : builder;
return p.getFailOnUnknownNodes();
}
@Override
public void setFailOnUnknownNodes(boolean failOnUnknownNodes) {
maybeInitBuilder();
builder.setFailOnUnknownNodes(failOnUnknownNodes);
}
private NodeIdProto convertToProtoFormat(NodeId t) { private NodeIdProto convertToProtoFormat(NodeId t) {
return ((NodeIdPBImpl) t).getProto(); return ((NodeIdPBImpl) t).getProto();
} }

View File

@ -21,6 +21,9 @@
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set; import java.util.Set;
@ -806,6 +809,49 @@ public ReplaceLabelsOnNodeResponse replaceLabelsOnNode(
ReplaceLabelsOnNodeResponse response = ReplaceLabelsOnNodeResponse response =
recordFactory.newRecordInstance(ReplaceLabelsOnNodeResponse.class); recordFactory.newRecordInstance(ReplaceLabelsOnNodeResponse.class);
if (request.getFailOnUnknownNodes()) {
// verify if nodes have registered to RM
List<NodeId> unknownNodes = new ArrayList<>();
for (NodeId requestedNode : request.getNodeToLabels().keySet()) {
boolean isKnown = false;
// both active and inactive nodes are recognized as known nodes
if (requestedNode.getPort() != 0) {
if (rmContext.getRMNodes().containsKey(requestedNode)
|| rmContext.getInactiveRMNodes().containsKey(requestedNode)) {
isKnown = true;
}
} else {
for (NodeId knownNode : rmContext.getRMNodes().keySet()) {
if (knownNode.getHost().equals(requestedNode.getHost())) {
isKnown = true;
break;
}
}
if (!isKnown) {
for (NodeId knownNode : rmContext.getInactiveRMNodes().keySet()) {
if (knownNode.getHost().equals(requestedNode.getHost())) {
isKnown = true;
break;
}
}
}
}
if (!isKnown) {
unknownNodes.add(requestedNode);
}
}
if (!unknownNodes.isEmpty()) {
RMAuditLogger.logFailure(user.getShortUserName(), operation, "",
"AdminService",
"Failed to replace labels as there are unknown nodes:"
+ Arrays.toString(unknownNodes.toArray()));
throw RPCUtil.getRemoteException(new IOException(
"Failed to replace labels as there are unknown nodes:"
+ Arrays.toString(unknownNodes.toArray())));
}
}
try { try {
rmContext.getNodeLabelManager().replaceLabelsOnNode( rmContext.getNodeLabelManager().replaceLabelsOnNode(
request.getNodeToLabels()); request.getNodeToLabels());

View File

@ -28,6 +28,7 @@
import java.io.PrintWriter; import java.io.PrintWriter;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -64,9 +65,9 @@
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.resource.DynamicResourceConfiguration;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.junit.After; import org.junit.After;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
@ -1085,6 +1086,106 @@ public void testModifyLabelsOnNodesWithCentralizedConfigurationDisabled()
rm.close(); rm.close();
} }
@Test
public void testModifyLabelsOnUnknownNodes() throws IOException,
YarnException {
// create RM and set it's ACTIVE, and set distributed node label
// configuration to true
rm = new MockRM();
((RMContextImpl) rm.getRMContext())
.setHAServiceState(HAServiceState.ACTIVE);
Map<NodeId, RMNode> rmNodes = rm.getRMContext().getRMNodes();
rmNodes.put(NodeId.newInstance("host1", 1111),
new RMNodeImpl(null, rm.getRMContext(), "host1", 0, 0, null, null,
null));
rmNodes.put(NodeId.newInstance("host2", 2222),
new RMNodeImpl(null, rm.getRMContext(), "host2", 0, 0, null, null,
null));
rmNodes.put(NodeId.newInstance("host3", 3333),
new RMNodeImpl(null, rm.getRMContext(), "host3", 0, 0, null, null,
null));
Map<NodeId, RMNode> rmInactiveNodes = rm.getRMContext()
.getInactiveRMNodes();
rmInactiveNodes.put(NodeId.newInstance("host4", 4444),
new RMNodeImpl(null, rm.getRMContext(), "host4", 0, 0, null, null,
null));
RMNodeLabelsManager labelMgr = rm.rmContext.getNodeLabelManager();
// by default, distributed configuration for node label is disabled, this
// should pass
labelMgr.addToCluserNodeLabelsWithDefaultExclusivity(ImmutableSet.of("x",
"y"));
// replace known node
ReplaceLabelsOnNodeRequest request1 = ReplaceLabelsOnNodeRequest
.newInstance(ImmutableMap.of(NodeId.newInstance("host1", 1111),
(Set<String>) ImmutableSet.of("x")));
request1.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request1);
} catch (Exception ex) {
fail("should not fail on known node");
}
// replace known node with wildcard port
ReplaceLabelsOnNodeRequest request2 = ReplaceLabelsOnNodeRequest
.newInstance(ImmutableMap.of(NodeId.newInstance("host1", 0),
(Set<String>) ImmutableSet.of("x")));
request2.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request2);
} catch (Exception ex) {
fail("should not fail on known node");
}
// replace unknown node
ReplaceLabelsOnNodeRequest request3 = ReplaceLabelsOnNodeRequest
.newInstance(ImmutableMap.of(NodeId.newInstance("host5", 0),
(Set<String>) ImmutableSet.of("x")));
request3.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request3);
fail("Should fail on unknown node");
} catch (Exception ex) {
}
// replace known node but wrong port
ReplaceLabelsOnNodeRequest request4 = ReplaceLabelsOnNodeRequest
.newInstance(ImmutableMap.of(NodeId.newInstance("host2", 1111),
(Set<String>) ImmutableSet.of("x")));
request4.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request4);
fail("Should fail on node with wrong port");
} catch (Exception ex) {
}
// replace non-exist node but not check
ReplaceLabelsOnNodeRequest request5 = ReplaceLabelsOnNodeRequest
.newInstance(ImmutableMap.of(NodeId.newInstance("host5", 0),
(Set<String>) ImmutableSet.of("x")));
request5.setFailOnUnknownNodes(false);
try {
rm.adminService.replaceLabelsOnNode(request5);
} catch (Exception ex) {
fail("Should not fail on unknown node when "
+ "fail-on-unkown-nodes is set false");
}
// replace on inactive node
ReplaceLabelsOnNodeRequest request6 = ReplaceLabelsOnNodeRequest
.newInstance(ImmutableMap.of(NodeId.newInstance("host4", 0),
(Set<String>) ImmutableSet.of("x")));
request6.setFailOnUnknownNodes(true);
try {
rm.adminService.replaceLabelsOnNode(request6);
} catch (Exception ex) {
fail("should not fail on inactive node");
}
rm.close();
}
@Test @Test
public void testRemoveClusterNodeLabelsWithCentralizedConfigurationDisabled() public void testRemoveClusterNodeLabelsWithCentralizedConfigurationDisabled()
throws IOException, YarnException { throws IOException, YarnException {