YARN-8163. Add support for Node Labels in opportunistic scheduling. Contributed by Abhishek Modi.
This commit is contained in:
parent
4cdbdce752
commit
6a69239d86
|
@ -64,6 +64,26 @@ public abstract class RemoteNode implements Comparable<RemoteNode> {
|
||||||
return remoteNode;
|
return remoteNode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create new Instance.
|
||||||
|
* @param nodeId NodeId.
|
||||||
|
* @param httpAddress Http address.
|
||||||
|
* @param rackName Rack Name.
|
||||||
|
* @param nodePartition Node Partition.
|
||||||
|
* @return RemoteNode Instance.
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public static RemoteNode newInstance(NodeId nodeId, String httpAddress,
|
||||||
|
String rackName, String nodePartition) {
|
||||||
|
RemoteNode remoteNode = Records.newRecord(RemoteNode.class);
|
||||||
|
remoteNode.setNodeId(nodeId);
|
||||||
|
remoteNode.setHttpAddress(httpAddress);
|
||||||
|
remoteNode.setRackName(rackName);
|
||||||
|
remoteNode.setNodePartition(nodePartition);
|
||||||
|
return remoteNode;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get {@link NodeId}.
|
* Get {@link NodeId}.
|
||||||
* @return NodeId.
|
* @return NodeId.
|
||||||
|
@ -117,6 +137,23 @@ public abstract class RemoteNode implements Comparable<RemoteNode> {
|
||||||
* @param other RemoteNode.
|
* @param other RemoteNode.
|
||||||
* @return Comparison.
|
* @return Comparison.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get Node Partition.
|
||||||
|
* @return Node Partition.
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public abstract String getNodePartition();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set Node Partition.
|
||||||
|
* @param nodePartition
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public abstract void setNodePartition(String nodePartition);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(RemoteNode other) {
|
public int compareTo(RemoteNode other) {
|
||||||
return this.getNodeId().compareTo(other.getNodeId());
|
return this.getNodeId().compareTo(other.getNodeId());
|
||||||
|
@ -127,6 +164,7 @@ public abstract class RemoteNode implements Comparable<RemoteNode> {
|
||||||
return "RemoteNode{" +
|
return "RemoteNode{" +
|
||||||
"nodeId=" + getNodeId() + ", " +
|
"nodeId=" + getNodeId() + ", " +
|
||||||
"rackName=" + getRackName() + ", " +
|
"rackName=" + getRackName() + ", " +
|
||||||
"httpAddress=" + getHttpAddress() + "}";
|
"httpAddress=" + getHttpAddress() + ", " +
|
||||||
|
"partition=" + getNodePartition() + "}";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -136,6 +136,25 @@ public class RemoteNodePBImpl extends RemoteNode {
|
||||||
builder.setRackName(rackName);
|
builder.setRackName(rackName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getNodePartition() {
|
||||||
|
RemoteNodeProtoOrBuilder p = viaProto ? proto : builder;
|
||||||
|
if (!p.hasNodePartition()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return (p.getNodePartition());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setNodePartition(String nodePartition) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
if (nodePartition == null) {
|
||||||
|
builder.clearNodePartition();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
builder.setNodePartition(nodePartition);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return getProto().hashCode();
|
return getProto().hashCode();
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.scheduler;
|
package org.apache.hadoop.yarn.server.scheduler;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.security.SecurityUtil;
|
import org.apache.hadoop.security.SecurityUtil;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
@ -461,10 +462,17 @@ public class OpportunisticContainerAllocator {
|
||||||
private Collection<RemoteNode> findNodeCandidates(int loopIndex,
|
private Collection<RemoteNode> findNodeCandidates(int loopIndex,
|
||||||
Map<String, RemoteNode> allNodes, Set<String> blackList,
|
Map<String, RemoteNode> allNodes, Set<String> blackList,
|
||||||
EnrichedResourceRequest enrichedRR) {
|
EnrichedResourceRequest enrichedRR) {
|
||||||
if (loopIndex > 1) {
|
|
||||||
return allNodes.values();
|
|
||||||
} else {
|
|
||||||
LinkedList<RemoteNode> retList = new LinkedList<>();
|
LinkedList<RemoteNode> retList = new LinkedList<>();
|
||||||
|
String partition = getRequestPartition(enrichedRR);
|
||||||
|
if (loopIndex > 1) {
|
||||||
|
for (RemoteNode remoteNode : allNodes.values()) {
|
||||||
|
if (StringUtils.equals(partition, getRemoteNodePartition(remoteNode))) {
|
||||||
|
retList.add(remoteNode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return retList;
|
||||||
|
} else {
|
||||||
|
|
||||||
int numContainers = enrichedRR.getRequest().getNumContainers();
|
int numContainers = enrichedRR.getRequest().getNumContainers();
|
||||||
while (numContainers > 0) {
|
while (numContainers > 0) {
|
||||||
if (loopIndex == 0) {
|
if (loopIndex == 0) {
|
||||||
|
@ -489,8 +497,10 @@ public class OpportunisticContainerAllocator {
|
||||||
private int collectRackLocalCandidates(Map<String, RemoteNode> allNodes,
|
private int collectRackLocalCandidates(Map<String, RemoteNode> allNodes,
|
||||||
EnrichedResourceRequest enrichedRR, LinkedList<RemoteNode> retList,
|
EnrichedResourceRequest enrichedRR, LinkedList<RemoteNode> retList,
|
||||||
Set<String> blackList, int numContainers) {
|
Set<String> blackList, int numContainers) {
|
||||||
|
String partition = getRequestPartition(enrichedRR);
|
||||||
for (RemoteNode rNode : allNodes.values()) {
|
for (RemoteNode rNode : allNodes.values()) {
|
||||||
if (enrichedRR.getRackLocations().contains(rNode.getRackName())) {
|
if (StringUtils.equals(partition, getRemoteNodePartition(rNode)) &&
|
||||||
|
enrichedRR.getRackLocations().contains(rNode.getRackName())) {
|
||||||
if (blackList.contains(rNode.getNodeId().getHost())) {
|
if (blackList.contains(rNode.getNodeId().getHost())) {
|
||||||
retList.addLast(rNode);
|
retList.addLast(rNode);
|
||||||
} else {
|
} else {
|
||||||
|
@ -508,9 +518,11 @@ public class OpportunisticContainerAllocator {
|
||||||
private int collectNodeLocalCandidates(Map<String, RemoteNode> allNodes,
|
private int collectNodeLocalCandidates(Map<String, RemoteNode> allNodes,
|
||||||
EnrichedResourceRequest enrichedRR, List<RemoteNode> retList,
|
EnrichedResourceRequest enrichedRR, List<RemoteNode> retList,
|
||||||
int numContainers) {
|
int numContainers) {
|
||||||
|
String partition = getRequestPartition(enrichedRR);
|
||||||
for (String nodeName : enrichedRR.getNodeLocations()) {
|
for (String nodeName : enrichedRR.getNodeLocations()) {
|
||||||
RemoteNode remoteNode = allNodes.get(nodeName);
|
RemoteNode remoteNode = allNodes.get(nodeName);
|
||||||
if (remoteNode != null) {
|
if (remoteNode != null &&
|
||||||
|
StringUtils.equals(partition, getRemoteNodePartition(remoteNode))) {
|
||||||
retList.add(remoteNode);
|
retList.add(remoteNode);
|
||||||
numContainers--;
|
numContainers--;
|
||||||
}
|
}
|
||||||
|
@ -563,7 +575,7 @@ public class OpportunisticContainerAllocator {
|
||||||
capability, currTime + tokenExpiry,
|
capability, currTime + tokenExpiry,
|
||||||
tokenSecretManager.getCurrentKey().getKeyId(), rmIdentifier,
|
tokenSecretManager.getCurrentKey().getKeyId(), rmIdentifier,
|
||||||
schedulerKey.getPriority(), currTime,
|
schedulerKey.getPriority(), currTime,
|
||||||
null, CommonNodeLabelsManager.NO_LABEL, ContainerType.TASK,
|
null, getRemoteNodePartition(node), ContainerType.TASK,
|
||||||
ExecutionType.OPPORTUNISTIC, schedulerKey.getAllocationRequestId());
|
ExecutionType.OPPORTUNISTIC, schedulerKey.getAllocationRequestId());
|
||||||
byte[] pwd =
|
byte[] pwd =
|
||||||
tokenSecretManager.createPassword(containerTokenIdentifier);
|
tokenSecretManager.createPassword(containerTokenIdentifier);
|
||||||
|
@ -616,4 +628,20 @@ public class OpportunisticContainerAllocator {
|
||||||
}
|
}
|
||||||
return partitionedRequests;
|
return partitionedRequests;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getRequestPartition(EnrichedResourceRequest enrichedRR) {
|
||||||
|
String partition = enrichedRR.getRequest().getNodeLabelExpression();
|
||||||
|
if (partition == null) {
|
||||||
|
partition = CommonNodeLabelsManager.NO_LABEL;
|
||||||
|
}
|
||||||
|
return partition;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getRemoteNodePartition(RemoteNode node) {
|
||||||
|
String partition = node.getNodePartition();
|
||||||
|
if (partition == null) {
|
||||||
|
partition = CommonNodeLabelsManager.NO_LABEL;
|
||||||
|
}
|
||||||
|
return partition;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ message RemoteNodeProto {
|
||||||
optional NodeIdProto node_id = 1;
|
optional NodeIdProto node_id = 1;
|
||||||
optional string http_address = 2;
|
optional string http_address = 2;
|
||||||
optional string rack_name = 3;
|
optional string rack_name = 3;
|
||||||
|
optional string node_partition = 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
message RegisterDistributedSchedulingAMResponseProto {
|
message RegisterDistributedSchedulingAMResponseProto {
|
||||||
|
|
|
@ -596,4 +596,41 @@ public class TestOpportunisticContainerAllocator {
|
||||||
}
|
}
|
||||||
Assert.assertEquals(100, containers.size());
|
Assert.assertEquals(100, containers.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAllocationWithNodeLabels() throws Exception {
|
||||||
|
ResourceBlacklistRequest blacklistRequest =
|
||||||
|
ResourceBlacklistRequest.newInstance(
|
||||||
|
new ArrayList<>(), new ArrayList<>());
|
||||||
|
List<ResourceRequest> reqs =
|
||||||
|
Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1),
|
||||||
|
"*", Resources.createResource(1 * GB), 1, true, "label",
|
||||||
|
ExecutionTypeRequest.newInstance(
|
||||||
|
ExecutionType.OPPORTUNISTIC, true)));
|
||||||
|
ApplicationAttemptId appAttId = ApplicationAttemptId.newInstance(
|
||||||
|
ApplicationId.newInstance(0L, 1), 1);
|
||||||
|
|
||||||
|
oppCntxt.updateNodeList(
|
||||||
|
Arrays.asList(
|
||||||
|
RemoteNode.newInstance(
|
||||||
|
NodeId.newInstance("h1", 1234), "h1:1234", "/r1")));
|
||||||
|
List<Container> containers = allocator.allocateContainers(
|
||||||
|
blacklistRequest, reqs, appAttId, oppCntxt, 1L, "luser");
|
||||||
|
/* Since there is no node satisfying node label constraints, requests
|
||||||
|
won't get fulfilled.
|
||||||
|
*/
|
||||||
|
Assert.assertEquals(0, containers.size());
|
||||||
|
Assert.assertEquals(1, oppCntxt.getOutstandingOpReqs().size());
|
||||||
|
|
||||||
|
oppCntxt.updateNodeList(
|
||||||
|
Arrays.asList(
|
||||||
|
RemoteNode.newInstance(
|
||||||
|
NodeId.newInstance("h1", 1234), "h1:1234", "/r1",
|
||||||
|
"label")));
|
||||||
|
|
||||||
|
containers = allocator.allocateContainers(
|
||||||
|
blacklistRequest, reqs, appAttId, oppCntxt, 1L, "luser");
|
||||||
|
Assert.assertEquals(1, containers.size());
|
||||||
|
Assert.assertEquals(0, oppCntxt.getOutstandingOpReqs().size());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRespons
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.Container;
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
|
||||||
import org.apache.hadoop.yarn.server.api.DistributedSchedulingAMProtocol;
|
import org.apache.hadoop.yarn.server.api.DistributedSchedulingAMProtocol;
|
||||||
|
@ -174,6 +175,16 @@ public class OpportunisticContainerAllocatorAMService
|
||||||
appAttempt.getOpportunisticContainerContext();
|
appAttempt.getOpportunisticContainerContext();
|
||||||
oppCtx.updateNodeList(getLeastLoadedNodes());
|
oppCtx.updateNodeList(getLeastLoadedNodes());
|
||||||
|
|
||||||
|
if (!partitionedAsks.getOpportunistic().isEmpty()) {
|
||||||
|
String appPartition = appAttempt.getAppAMNodePartitionName();
|
||||||
|
|
||||||
|
for (ResourceRequest req : partitionedAsks.getOpportunistic()) {
|
||||||
|
if (null == req.getNodeLabelExpression()) {
|
||||||
|
req.setNodeLabelExpression(appPartition);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
List<Container> oppContainers =
|
List<Container> oppContainers =
|
||||||
oppContainerAllocator.allocateContainers(
|
oppContainerAllocator.allocateContainers(
|
||||||
request.getResourceBlacklistRequest(),
|
request.getResourceBlacklistRequest(),
|
||||||
|
@ -436,6 +447,7 @@ public class OpportunisticContainerAllocatorAMService
|
||||||
if (node != null) {
|
if (node != null) {
|
||||||
RemoteNode rNode = RemoteNode.newInstance(nodeId, node.getHttpAddress());
|
RemoteNode rNode = RemoteNode.newInstance(nodeId, node.getHttpAddress());
|
||||||
rNode.setRackName(node.getRackName());
|
rNode.setRackName(node.getRackName());
|
||||||
|
rNode.setNodePartition(node.getPartition());
|
||||||
return rNode;
|
return rNode;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -927,6 +927,8 @@ public class TestOpportunisticContainerAllocatorAMService {
|
||||||
distAllReq.getProto()));
|
distAllReq.getProto()));
|
||||||
Assert.assertEquals(
|
Assert.assertEquals(
|
||||||
"h1", dsAllocResp.getNodesForScheduling().get(0).getNodeId().getHost());
|
"h1", dsAllocResp.getNodesForScheduling().get(0).getNodeId().getHost());
|
||||||
|
Assert.assertEquals(
|
||||||
|
"l1", dsAllocResp.getNodesForScheduling().get(1).getNodePartition());
|
||||||
|
|
||||||
FinishApplicationMasterResponse dsfinishResp =
|
FinishApplicationMasterResponse dsfinishResp =
|
||||||
new FinishApplicationMasterResponsePBImpl(
|
new FinishApplicationMasterResponsePBImpl(
|
||||||
|
@ -1004,9 +1006,13 @@ public class TestOpportunisticContainerAllocatorAMService {
|
||||||
.getExecutionTypeRequest().getEnforceExecutionType());
|
.getExecutionTypeRequest().getEnforceExecutionType());
|
||||||
DistributedSchedulingAllocateResponse resp = factory
|
DistributedSchedulingAllocateResponse resp = factory
|
||||||
.newRecordInstance(DistributedSchedulingAllocateResponse.class);
|
.newRecordInstance(DistributedSchedulingAllocateResponse.class);
|
||||||
|
RemoteNode remoteNode1 = RemoteNode.newInstance(
|
||||||
|
NodeId.newInstance("h1", 1234), "http://h1:4321");
|
||||||
|
RemoteNode remoteNode2 = RemoteNode.newInstance(
|
||||||
|
NodeId.newInstance("h2", 1234), "http://h2:4321");
|
||||||
|
remoteNode2.setNodePartition("l1");
|
||||||
resp.setNodesForScheduling(
|
resp.setNodesForScheduling(
|
||||||
Arrays.asList(RemoteNode.newInstance(
|
Arrays.asList(remoteNode1, remoteNode2));
|
||||||
NodeId.newInstance("h1", 1234), "http://h1:4321")));
|
|
||||||
return resp;
|
return resp;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue