YARN-11220. [Federation] Add getLabelsToNodes, getClusterNodeLabels, getLabelsOnNode REST APIs for Router (#4657)

This commit is contained in:
slfan1989 2022-08-03 03:09:55 +08:00 committed by GitHub
parent 123d1aa884
commit 57da4bb0a1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 299 additions and 7 deletions

View File

@ -24,6 +24,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.Collection;
import java.util.HashSet;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
@ -58,6 +61,11 @@ public class NodeIDsInfo {
this.partitionInfo = new PartitionInfo(new ResourceInfo(resource));
}
public NodeIDsInfo(Collection<String> nodeIdsList, PartitionInfo partitionInfo) {
this.nodeIDsList.addAll(nodeIdsList);
this.partitionInfo = partitionInfo;
}
public ArrayList<String> getNodeIDs() {
return nodeIDsList;
}
@ -65,4 +73,36 @@ public class NodeIDsInfo {
public PartitionInfo getPartitionInfo() {
return partitionInfo;
}
/**
* This method will generate a new NodeIDsInfo object based on the two NodeIDsInfo objects.
* The information to be combined includes the node list (removed duplicate node)
* and partitionInfo object.
*
* @param left left NodeIDsInfo Object.
* @param right right NodeIDsInfo Object.
* @return new NodeIDsInfo Object.
*/
public static NodeIDsInfo add(NodeIDsInfo left, NodeIDsInfo right) {
Set<String> nodes = new HashSet<>();
if (left != null && left.nodeIDsList != null) {
nodes.addAll(left.nodeIDsList);
}
if (right != null && right.nodeIDsList != null) {
nodes.addAll(right.nodeIDsList);
}
PartitionInfo leftPartitionInfo = null;
if (left != null) {
leftPartitionInfo = left.getPartitionInfo();
}
PartitionInfo rightPartitionInfo = null;
if (right != null) {
rightPartitionInfo = right.getPartitionInfo();
}
PartitionInfo info = PartitionInfo.addTo(leftPartitionInfo, rightPartitionInfo);
return new NodeIDsInfo(nodes, info);
}
}

View File

@ -31,8 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo;
@XmlAccessorType(XmlAccessType.FIELD)
public class LabelsToNodesInfo {
protected Map<NodeLabelInfo, NodeIDsInfo> labelsToNodes =
new HashMap<NodeLabelInfo, NodeIDsInfo>();
private Map<NodeLabelInfo, NodeIDsInfo> labelsToNodes = new HashMap<>();
public LabelsToNodesInfo() {
} // JAXB needs this
@ -44,4 +43,8 @@ public class LabelsToNodesInfo {
public Map<NodeLabelInfo, NodeIDsInfo> getLabelsToNodes() {
return labelsToNodes;
}
public void setLabelsToNodes(Map<NodeLabelInfo, NodeIDsInfo> labelsToNodes) {
this.labelsToNodes = labelsToNodes;
}
}

View File

@ -18,6 +18,9 @@
package org.apache.hadoop.yarn.server.resourcemanager.webapp.dao;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.util.resource.Resources;
import javax.xml.bind.annotation.XmlAccessType;
import javax.xml.bind.annotation.XmlAccessorType;
import javax.xml.bind.annotation.XmlElement;
@ -43,4 +46,29 @@ public class PartitionInfo {
public ResourceInfo getResourceAvailable() {
return resourceAvailable;
}
/**
* This method will generate a new PartitionInfo object based on two PartitionInfo objects.
* The combination process is mainly based on the Resources.add method.
*
* @param left left PartitionInfo Object.
* @param right right PartitionInfo Object.
* @return new PartitionInfo Object.
*/
public static PartitionInfo addTo(PartitionInfo left, PartitionInfo right) {
Resource leftResource = Resource.newInstance(0, 0);
if (left != null && left.getResourceAvailable() != null) {
ResourceInfo leftResourceInfo = left.getResourceAvailable();
leftResource = leftResourceInfo.getResource();
}
Resource rightResource = Resource.newInstance(0, 0);
if (right != null && right.getResourceAvailable() != null) {
ResourceInfo rightResourceInfo = right.getResourceAvailable();
rightResource = rightResourceInfo.getResource();
}
Resource resource = Resources.addTo(leftResource, rightResource);
return new PartitionInfo(new ResourceInfo(resource));
}
}

View File

@ -25,6 +25,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* Common utility methods used by the Router server.
*
@ -60,4 +62,24 @@ public final class RouterServerUtil {
}
}
/**
* Throws an IOException due to an error.
*
* @param errMsg the error message
* @param t the throwable raised in the called class.
* @throws IOException on failure
*/
@Public
@Unstable
public static void logAndThrowIOException(String errMsg, Throwable t)
throws IOException {
if (t != null) {
LOG.error(errMsg, t);
throw new IOException(errMsg, t);
} else {
LOG.error(errMsg);
throw new IOException(errMsg);
}
}
}

View File

@ -46,9 +46,11 @@ import org.apache.commons.lang3.NotImplementedException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.authorize.AuthorizationException;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.Sets;
import org.apache.hadoop.util.concurrent.HadoopExecutors;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
@ -60,6 +62,7 @@ import org.apache.hadoop.yarn.server.federation.store.records.ApplicationHomeSub
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo;
import org.apache.hadoop.yarn.server.federation.utils.FederationStateStoreFacade;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.RMWebAppUtil;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ActivitiesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppActivitiesInfo;
@ -91,6 +94,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceOptionInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.BulkActivitiesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerTypeInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelInfo;
import org.apache.hadoop.yarn.server.router.RouterMetrics;
import org.apache.hadoop.yarn.server.router.RouterServerUtil;
import org.apache.hadoop.yarn.server.router.clientrm.ClientMethod;
@ -1161,7 +1165,32 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
@Override
public LabelsToNodesInfo getLabelsToNodes(Set<String> labels)
throws IOException {
throw new NotImplementedException("Code is not implemented");
try {
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
Class[] argsClasses = new Class[]{Set.class};
Object[] args = new Object[]{labels};
ClientMethod remoteMethod = new ClientMethod("getLabelsToNodes", argsClasses, args);
Map<SubClusterInfo, LabelsToNodesInfo> labelsToNodesInfoMap =
invokeConcurrent(subClustersActive.values(), remoteMethod, LabelsToNodesInfo.class);
Map<NodeLabelInfo, NodeIDsInfo> labelToNodesMap = new HashMap<>();
labelsToNodesInfoMap.values().forEach(labelsToNode -> {
Map<NodeLabelInfo, NodeIDsInfo> values = labelsToNode.getLabelsToNodes();
for (Map.Entry<NodeLabelInfo, NodeIDsInfo> item : values.entrySet()) {
NodeLabelInfo key = item.getKey();
NodeIDsInfo leftValue = item.getValue();
NodeIDsInfo rightValue = labelToNodesMap.getOrDefault(key, null);
NodeIDsInfo newValue = NodeIDsInfo.add(leftValue, rightValue);
labelToNodesMap.put(key, newValue);
}
});
return new LabelsToNodesInfo(labelToNodesMap);
} catch (NotFoundException e) {
RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
} catch (YarnException e) {
RouterServerUtil.logAndThrowIOException("getLabelsToNodes error.", e);
}
return null;
}
@Override
@ -1179,7 +1208,23 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
@Override
public NodeLabelsInfo getClusterNodeLabels(HttpServletRequest hsr)
throws IOException {
throw new NotImplementedException("Code is not implemented");
try {
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class};
Object[] args = new Object[]{hsrCopy};
ClientMethod remoteMethod = new ClientMethod("getClusterNodeLabels", argsClasses, args);
Map<SubClusterInfo, NodeLabelsInfo> nodeToLabelsInfoMap =
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
Set<NodeLabel> hashSets = Sets.newHashSet();
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
return new NodeLabelsInfo(hashSets);
} catch (NotFoundException e) {
RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
} catch (YarnException e) {
RouterServerUtil.logAndThrowIOException("getClusterNodeLabels error.", e);
}
return null;
}
@Override
@ -1197,7 +1242,23 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
@Override
public NodeLabelsInfo getLabelsOnNode(HttpServletRequest hsr, String nodeId)
throws IOException {
throw new NotImplementedException("Code is not implemented");
try {
Map<SubClusterId, SubClusterInfo> subClustersActive = getActiveSubclusters();
final HttpServletRequest hsrCopy = clone(hsr);
Class[] argsClasses = new Class[]{HttpServletRequest.class, String.class};
Object[] args = new Object[]{hsrCopy, nodeId};
ClientMethod remoteMethod = new ClientMethod("getLabelsOnNode", argsClasses, args);
Map<SubClusterInfo, NodeLabelsInfo> nodeToLabelsInfoMap =
invokeConcurrent(subClustersActive.values(), remoteMethod, NodeLabelsInfo.class);
Set<NodeLabel> hashSets = Sets.newHashSet();
nodeToLabelsInfoMap.values().forEach(item -> hashSets.addAll(item.getNodeLabels()));
return new NodeLabelsInfo(hashSets);
} catch (NotFoundException e) {
RouterServerUtil.logAndThrowIOException("Get all active sub cluster(s) error.", e);
} catch (YarnException e) {
RouterServerUtil.logAndThrowIOException("getClusterNodeLabels error.", e);
}
return null;
}
@Override
@ -1396,7 +1457,7 @@ public class FederationInterceptorREST extends AbstractRESTRequestInterceptor {
R ret = clazz.cast(retObj);
return ret;
} catch (Exception e) {
LOG.error("SubCluster %s failed to call %s method.",
LOG.error("SubCluster {} failed to call {} method.",
info.getSubClusterId(), request.getMethodName(), e);
return null;
}

View File

@ -20,10 +20,13 @@ package org.apache.hadoop.yarn.server.router.webapp;
import java.io.IOException;
import java.net.ConnectException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Set;
import java.util.Map;
import java.util.HashMap;
import java.util.Collections;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicInteger;
import javax.servlet.http.HttpServletRequest;
@ -33,6 +36,8 @@ import javax.ws.rs.core.Response;
import javax.ws.rs.core.Response.Status;
import org.apache.hadoop.security.authorize.AuthorizationException;
import org.apache.hadoop.util.Sets;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ContainerId;
@ -41,9 +46,13 @@ import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerReport;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppState;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ApplicationSubmissionContextInfo;
@ -55,7 +64,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceOptionInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainerInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
import org.apache.hadoop.yarn.webapp.NotFoundException;
@ -297,4 +306,61 @@ public class MockDefaultRequestInterceptorREST
nodeLabels.put("node2", gpuNode);
return new NodeToLabelsInfo(nodeLabels);
}
@Override
public LabelsToNodesInfo getLabelsToNodes(Set<String> labels) throws IOException {
if (!isRunning) {
throw new RuntimeException("RM is stopped");
}
Map<NodeLabelInfo, NodeIDsInfo> labelsToNodes = new HashMap<>();
NodeLabel labelX = NodeLabel.newInstance("x", false);
NodeLabelInfo nodeLabelInfoX = new NodeLabelInfo(labelX);
ArrayList<String> hostsX = new ArrayList<>(Arrays.asList("host1A", "host1B"));
Resource resourceX = Resource.newInstance(20*1024, 10);
NodeIDsInfo nodeIDsInfoX = new NodeIDsInfo(hostsX, resourceX);
labelsToNodes.put(nodeLabelInfoX, nodeIDsInfoX);
NodeLabel labelY = NodeLabel.newInstance("y", false);
NodeLabelInfo nodeLabelInfoY = new NodeLabelInfo(labelY);
ArrayList<String> hostsY = new ArrayList<>(Arrays.asList("host2A", "host2B"));
Resource resourceY = Resource.newInstance(40*1024, 20);
NodeIDsInfo nodeIDsInfoY = new NodeIDsInfo(hostsY, resourceY);
labelsToNodes.put(nodeLabelInfoY, nodeIDsInfoY);
NodeLabel labelZ = NodeLabel.newInstance("z", false);
NodeLabelInfo nodeLabelInfoZ = new NodeLabelInfo(labelZ);
ArrayList<String> hostsZ = new ArrayList<>(Arrays.asList("host3A", "host3B"));
Resource resourceZ = Resource.newInstance(80*1024, 40);
NodeIDsInfo nodeIDsInfoZ = new NodeIDsInfo(hostsZ, resourceZ);
labelsToNodes.put(nodeLabelInfoZ, nodeIDsInfoZ);
return new LabelsToNodesInfo(labelsToNodes);
}
@Override
public NodeLabelsInfo getClusterNodeLabels(HttpServletRequest hsr) throws IOException {
if (!isRunning) {
throw new RuntimeException("RM is stopped");
}
NodeLabel labelCpu = NodeLabel.newInstance("cpu", false);
NodeLabel labelGpu = NodeLabel.newInstance("gpu", false);
return new NodeLabelsInfo(Sets.newHashSet(labelCpu, labelGpu));
}
@Override
public NodeLabelsInfo getLabelsOnNode(HttpServletRequest hsr, String nodeId) throws IOException {
if (!isRunning) {
throw new RuntimeException("RM is stopped");
}
if (StringUtils.equalsIgnoreCase(nodeId, "node1")) {
NodeLabel labelCpu = NodeLabel.newInstance("x", false);
NodeLabel labelGpu = NodeLabel.newInstance("y", false);
return new NodeLabelsInfo(Sets.newHashSet(labelCpu, labelGpu));
} else {
return null;
}
}
}

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.HashMap;
import java.util.Map;
import javax.ws.rs.core.Response;
@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceOption;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.NodeLabel;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.server.federation.policies.manager.UniformBroadcastPolicyManager;
@ -52,6 +54,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceOptionInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeToLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeLabelInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.LabelsToNodesInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.NodeIDsInfo;
import org.apache.hadoop.yarn.server.webapp.dao.ContainersInfo;
import org.apache.hadoop.yarn.util.MonotonicClock;
import org.junit.Assert;
@ -626,4 +631,71 @@ public class TestFederationInterceptorREST extends BaseRouterWebServicesTest {
Assert.assertEquals(1, node2Value.getNodeLabelsName().size());
Assert.assertEquals("GPU", node2Value.getNodeLabelsName().get(0));
}
@Test
public void testGetLabelsToNodes() throws Exception {
LabelsToNodesInfo labelsToNodesInfo = interceptor.getLabelsToNodes(null);
Map<NodeLabelInfo, NodeIDsInfo> map = labelsToNodesInfo.getLabelsToNodes();
Assert.assertNotNull(map);
Assert.assertEquals(3, map.size());
NodeLabel labelX = NodeLabel.newInstance("x", false);
NodeLabelInfo nodeLabelInfoX = new NodeLabelInfo(labelX);
NodeIDsInfo nodeIDsInfoX = map.get(nodeLabelInfoX);
Assert.assertNotNull(nodeIDsInfoX);
Assert.assertEquals(2, nodeIDsInfoX.getNodeIDs().size());
Resource resourceX =
nodeIDsInfoX.getPartitionInfo().getResourceAvailable().getResource();
Assert.assertNotNull(resourceX);
Assert.assertEquals(4*10, resourceX.getVirtualCores());
Assert.assertEquals(4*20*1024, resourceX.getMemorySize());
NodeLabel labelY = NodeLabel.newInstance("y", false);
NodeLabelInfo nodeLabelInfoY = new NodeLabelInfo(labelY);
NodeIDsInfo nodeIDsInfoY = map.get(nodeLabelInfoY);
Assert.assertNotNull(nodeIDsInfoY);
Assert.assertEquals(2, nodeIDsInfoY.getNodeIDs().size());
Resource resourceY =
nodeIDsInfoY.getPartitionInfo().getResourceAvailable().getResource();
Assert.assertNotNull(resourceY);
Assert.assertEquals(4*20, resourceY.getVirtualCores());
Assert.assertEquals(4*40*1024, resourceY.getMemorySize());
}
@Test
public void testGetClusterNodeLabels() throws Exception {
NodeLabelsInfo nodeLabelsInfo = interceptor.getClusterNodeLabels(null);
Assert.assertNotNull(nodeLabelsInfo);
Assert.assertEquals(2, nodeLabelsInfo.getNodeLabelsName().size());
List<String> nodeLabelsName = nodeLabelsInfo.getNodeLabelsName();
Assert.assertNotNull(nodeLabelsName);
Assert.assertTrue(nodeLabelsName.contains("cpu"));
Assert.assertTrue(nodeLabelsName.contains("gpu"));
ArrayList<NodeLabelInfo> nodeLabelInfos = nodeLabelsInfo.getNodeLabelsInfo();
Assert.assertNotNull(nodeLabelInfos);
Assert.assertEquals(2, nodeLabelInfos.size());
NodeLabelInfo cpuNodeLabelInfo = new NodeLabelInfo("cpu", false);
Assert.assertTrue(nodeLabelInfos.contains(cpuNodeLabelInfo));
NodeLabelInfo gpuNodeLabelInfo = new NodeLabelInfo("gpu", false);
Assert.assertTrue(nodeLabelInfos.contains(gpuNodeLabelInfo));
}
@Test
public void testGetLabelsOnNode() throws Exception {
NodeLabelsInfo nodeLabelsInfo = interceptor.getLabelsOnNode(null, "node1");
Assert.assertNotNull(nodeLabelsInfo);
Assert.assertEquals(2, nodeLabelsInfo.getNodeLabelsName().size());
List<String> nodeLabelsName = nodeLabelsInfo.getNodeLabelsName();
Assert.assertNotNull(nodeLabelsName);
Assert.assertTrue(nodeLabelsName.contains("x"));
Assert.assertTrue(nodeLabelsName.contains("y"));
// null request
NodeLabelsInfo nodeLabelsInfo2 = interceptor.getLabelsOnNode(null, "node2");
Assert.assertNotNull(nodeLabelsInfo2);
Assert.assertEquals(0, nodeLabelsInfo2.getNodeLabelsName().size());
}
}