YARN-10713. ClusterMetrics should support custom resource capacity related metrics. Contributed by Qi Zhu.

This commit is contained in:
Eric Badger 2021-03-25 22:33:58 +00:00
parent af1f9f43ea
commit 19e418c10d
2 changed files with 38 additions and 18 deletions

View File

@ -20,6 +20,7 @@
import static org.apache.hadoop.metrics2.lib.Interns.info; import static org.apache.hadoop.metrics2.lib.Interns.info;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
@ -35,6 +36,9 @@
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.metrics.CustomResourceMetricValue;
import org.apache.hadoop.yarn.metrics.CustomResourceMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsForCustomResources;
import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.resource.ResourceUtils;
@InterfaceAudience.Private @InterfaceAudience.Private
@ -58,11 +62,20 @@ public class ClusterMetrics {
@Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores; @Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores;
@Metric("Memory Capability") MutableGaugeLong capabilityMB; @Metric("Memory Capability") MutableGaugeLong capabilityMB;
@Metric("Vcore Capability") MutableGaugeLong capabilityVirtualCores; @Metric("Vcore Capability") MutableGaugeLong capabilityVirtualCores;
@Metric("GPU Capability") MutableGaugeLong capabilityGPUs;
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics", private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
"Metrics for the Yarn Cluster"); "Metrics for the Yarn Cluster");
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX =
"Capability.";
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC =
"NAME Capability";
private static CustomResourceMetrics customResourceMetrics;
private final CustomResourceMetricValue customResourceCapability =
new CustomResourceMetricValue();
private static volatile ClusterMetrics INSTANCE = null; private static volatile ClusterMetrics INSTANCE = null;
private static MetricsRegistry registry; private static MetricsRegistry registry;
@ -86,6 +99,17 @@ private static void registerMetrics() {
if (ms != null) { if (ms != null) {
ms.register("ClusterMetrics", "Metrics for the Yarn Cluster", INSTANCE); ms.register("ClusterMetrics", "Metrics for the Yarn Cluster", INSTANCE);
} }
if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) {
customResourceMetrics =
new CustomResourceMetrics();
Map<String, Long> customResources =
customResourceMetrics.initAndGetCustomResources();
customResourceMetrics.
registerCustomResources(customResources,
registry, CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX,
CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC);
}
} }
@VisibleForTesting @VisibleForTesting
@ -209,23 +233,20 @@ public long getCapabilityVirtualCores() {
return capabilityVirtualCores.value(); return capabilityVirtualCores.value();
} }
public long getCapabilityGPUs() { public Map<String, Long> getCustomResourceCapability() {
if (capabilityGPUs == null) { return customResourceCapability.getValues();
return 0;
} }
return capabilityGPUs.value(); public void setCustomResourceCapability(Resource res) {
this.customResourceCapability.set(res);
} }
public void incrCapability(Resource res) { public void incrCapability(Resource res) {
if (res != null) { if (res != null) {
capabilityMB.incr(res.getMemorySize()); capabilityMB.incr(res.getMemorySize());
capabilityVirtualCores.incr(res.getVirtualCores()); capabilityVirtualCores.incr(res.getVirtualCores());
Integer gpuIndex = ResourceUtils.getResourceTypeIndex() if (customResourceCapability != null) {
.get(ResourceInformation.GPU_URI); customResourceCapability.increase(res);
if (gpuIndex != null) {
capabilityGPUs.incr(res.
getResourceValue(ResourceInformation.GPU_URI));
} }
} }
} }
@ -234,11 +255,8 @@ public void decrCapability(Resource res) {
if (res != null) { if (res != null) {
capabilityMB.decr(res.getMemorySize()); capabilityMB.decr(res.getMemorySize());
capabilityVirtualCores.decr(res.getVirtualCores()); capabilityVirtualCores.decr(res.getVirtualCores());
Integer gpuIndex = ResourceUtils.getResourceTypeIndex() if (customResourceCapability != null) {
.get(ResourceInformation.GPU_URI); customResourceCapability.decrease(res);
if (gpuIndex != null) {
capabilityGPUs.decr(res.
getResourceValue(ResourceInformation.GPU_URI));
} }
} }
} }

View File

@ -231,7 +231,8 @@ public void testClusterMetricsWithGPU()
assertEquals("Cluster Capability Vcores incorrect", assertEquals("Cluster Capability Vcores incorrect",
metrics.getCapabilityVirtualCores(), 4 * 8); metrics.getCapabilityVirtualCores(), 4 * 8);
assertEquals("Cluster Capability GPUs incorrect", assertEquals("Cluster Capability GPUs incorrect",
metrics.getCapabilityGPUs(), 4 * 8); (metrics.getCustomResourceCapability()
.get(GPU_URI)).longValue(), 4 * 8);
for (RMNode rmNode : rmNodes) { for (RMNode rmNode : rmNodes) {
nodeTracker.removeNode(rmNode.getNodeID()); nodeTracker.removeNode(rmNode.getNodeID());
@ -243,7 +244,8 @@ public void testClusterMetricsWithGPU()
assertEquals("Cluster Capability Vcores incorrect", assertEquals("Cluster Capability Vcores incorrect",
metrics.getCapabilityVirtualCores(), 0); metrics.getCapabilityVirtualCores(), 0);
assertEquals("Cluster Capability GPUs incorrect", assertEquals("Cluster Capability GPUs incorrect",
metrics.getCapabilityGPUs(), 0); (metrics.getCustomResourceCapability()
.get(GPU_URI)).longValue(), 0);
ClusterMetrics.destroy(); ClusterMetrics.destroy();
} }
} }