YARN-10713. ClusterMetrics should support custom resource capacity related metrics. Contributed by Qi Zhu.

This commit is contained in:
Eric Badger 2021-03-25 22:33:58 +00:00
parent af1f9f43ea
commit 19e418c10d
2 changed files with 38 additions and 18 deletions

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager;
import static org.apache.hadoop.metrics2.lib.Interns.info;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.classification.InterfaceAudience;
@ -35,6 +36,9 @@ import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.metrics.CustomResourceMetricValue;
import org.apache.hadoop.yarn.metrics.CustomResourceMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsForCustomResources;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
@InterfaceAudience.Private
@ -58,11 +62,20 @@ public class ClusterMetrics {
@Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores;
@Metric("Memory Capability") MutableGaugeLong capabilityMB;
@Metric("Vcore Capability") MutableGaugeLong capabilityVirtualCores;
@Metric("GPU Capability") MutableGaugeLong capabilityGPUs;
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
"Metrics for the Yarn Cluster");
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX =
"Capability.";
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC =
"NAME Capability";
private static CustomResourceMetrics customResourceMetrics;
private final CustomResourceMetricValue customResourceCapability =
new CustomResourceMetricValue();
private static volatile ClusterMetrics INSTANCE = null;
private static MetricsRegistry registry;
@ -86,6 +99,17 @@ public class ClusterMetrics {
if (ms != null) {
ms.register("ClusterMetrics", "Metrics for the Yarn Cluster", INSTANCE);
}
if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) {
customResourceMetrics =
new CustomResourceMetrics();
Map<String, Long> customResources =
customResourceMetrics.initAndGetCustomResources();
customResourceMetrics.
registerCustomResources(customResources,
registry, CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX,
CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC);
}
}
@VisibleForTesting
@ -209,23 +233,20 @@ public class ClusterMetrics {
return capabilityVirtualCores.value();
}
public long getCapabilityGPUs() {
if (capabilityGPUs == null) {
return 0;
}
public Map<String, Long> getCustomResourceCapability() {
return customResourceCapability.getValues();
}
return capabilityGPUs.value();
public void setCustomResourceCapability(Resource res) {
this.customResourceCapability.set(res);
}
public void incrCapability(Resource res) {
if (res != null) {
capabilityMB.incr(res.getMemorySize());
capabilityVirtualCores.incr(res.getVirtualCores());
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
.get(ResourceInformation.GPU_URI);
if (gpuIndex != null) {
capabilityGPUs.incr(res.
getResourceValue(ResourceInformation.GPU_URI));
if (customResourceCapability != null) {
customResourceCapability.increase(res);
}
}
}
@ -234,11 +255,8 @@ public class ClusterMetrics {
if (res != null) {
capabilityMB.decr(res.getMemorySize());
capabilityVirtualCores.decr(res.getVirtualCores());
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
.get(ResourceInformation.GPU_URI);
if (gpuIndex != null) {
capabilityGPUs.decr(res.
getResourceValue(ResourceInformation.GPU_URI));
if (customResourceCapability != null) {
customResourceCapability.decrease(res);
}
}
}

View File

@ -231,7 +231,8 @@ public class TestCSAllocateCustomResource {
assertEquals("Cluster Capability Vcores incorrect",
metrics.getCapabilityVirtualCores(), 4 * 8);
assertEquals("Cluster Capability GPUs incorrect",
metrics.getCapabilityGPUs(), 4 * 8);
(metrics.getCustomResourceCapability()
.get(GPU_URI)).longValue(), 4 * 8);
for (RMNode rmNode : rmNodes) {
nodeTracker.removeNode(rmNode.getNodeID());
@ -243,7 +244,8 @@ public class TestCSAllocateCustomResource {
assertEquals("Cluster Capability Vcores incorrect",
metrics.getCapabilityVirtualCores(), 0);
assertEquals("Cluster Capability GPUs incorrect",
metrics.getCapabilityGPUs(), 0);
(metrics.getCustomResourceCapability()
.get(GPU_URI)).longValue(), 0);
ClusterMetrics.destroy();
}
}