YARN-10713. ClusterMetrics should support custom resource capacity related metrics. Contributed by Qi Zhu.
This commit is contained in:
parent
af1f9f43ea
commit
19e418c10d
|
@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager;
|
||||||
|
|
||||||
import static org.apache.hadoop.metrics2.lib.Interns.info;
|
import static org.apache.hadoop.metrics2.lib.Interns.info;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
@ -35,6 +36,9 @@ import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
|
import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.yarn.api.records.ResourceInformation;
|
import org.apache.hadoop.yarn.api.records.ResourceInformation;
|
||||||
|
import org.apache.hadoop.yarn.metrics.CustomResourceMetricValue;
|
||||||
|
import org.apache.hadoop.yarn.metrics.CustomResourceMetrics;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetricsForCustomResources;
|
||||||
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
|
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
|
||||||
|
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
|
@ -58,11 +62,20 @@ public class ClusterMetrics {
|
||||||
@Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores;
|
@Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores;
|
||||||
@Metric("Memory Capability") MutableGaugeLong capabilityMB;
|
@Metric("Memory Capability") MutableGaugeLong capabilityMB;
|
||||||
@Metric("Vcore Capability") MutableGaugeLong capabilityVirtualCores;
|
@Metric("Vcore Capability") MutableGaugeLong capabilityVirtualCores;
|
||||||
@Metric("GPU Capability") MutableGaugeLong capabilityGPUs;
|
|
||||||
|
|
||||||
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
|
private static final MetricsInfo RECORD_INFO = info("ClusterMetrics",
|
||||||
"Metrics for the Yarn Cluster");
|
"Metrics for the Yarn Cluster");
|
||||||
|
|
||||||
|
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX =
|
||||||
|
"Capability.";
|
||||||
|
private static final String CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC =
|
||||||
|
"NAME Capability";
|
||||||
|
|
||||||
|
private static CustomResourceMetrics customResourceMetrics;
|
||||||
|
|
||||||
|
private final CustomResourceMetricValue customResourceCapability =
|
||||||
|
new CustomResourceMetricValue();
|
||||||
|
|
||||||
private static volatile ClusterMetrics INSTANCE = null;
|
private static volatile ClusterMetrics INSTANCE = null;
|
||||||
private static MetricsRegistry registry;
|
private static MetricsRegistry registry;
|
||||||
|
|
||||||
|
@ -86,6 +99,17 @@ public class ClusterMetrics {
|
||||||
if (ms != null) {
|
if (ms != null) {
|
||||||
ms.register("ClusterMetrics", "Metrics for the Yarn Cluster", INSTANCE);
|
ms.register("ClusterMetrics", "Metrics for the Yarn Cluster", INSTANCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ResourceUtils.getNumberOfKnownResourceTypes() > 2) {
|
||||||
|
customResourceMetrics =
|
||||||
|
new CustomResourceMetrics();
|
||||||
|
Map<String, Long> customResources =
|
||||||
|
customResourceMetrics.initAndGetCustomResources();
|
||||||
|
customResourceMetrics.
|
||||||
|
registerCustomResources(customResources,
|
||||||
|
registry, CUSTOM_RESOURCE_CAPABILITY_METRIC_PREFIX,
|
||||||
|
CUSTOM_RESOURCE_CAPABILITY_METRIC_DESC);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@ -209,23 +233,20 @@ public class ClusterMetrics {
|
||||||
return capabilityVirtualCores.value();
|
return capabilityVirtualCores.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getCapabilityGPUs() {
|
public Map<String, Long> getCustomResourceCapability() {
|
||||||
if (capabilityGPUs == null) {
|
return customResourceCapability.getValues();
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return capabilityGPUs.value();
|
public void setCustomResourceCapability(Resource res) {
|
||||||
|
this.customResourceCapability.set(res);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void incrCapability(Resource res) {
|
public void incrCapability(Resource res) {
|
||||||
if (res != null) {
|
if (res != null) {
|
||||||
capabilityMB.incr(res.getMemorySize());
|
capabilityMB.incr(res.getMemorySize());
|
||||||
capabilityVirtualCores.incr(res.getVirtualCores());
|
capabilityVirtualCores.incr(res.getVirtualCores());
|
||||||
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
|
if (customResourceCapability != null) {
|
||||||
.get(ResourceInformation.GPU_URI);
|
customResourceCapability.increase(res);
|
||||||
if (gpuIndex != null) {
|
|
||||||
capabilityGPUs.incr(res.
|
|
||||||
getResourceValue(ResourceInformation.GPU_URI));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -234,11 +255,8 @@ public class ClusterMetrics {
|
||||||
if (res != null) {
|
if (res != null) {
|
||||||
capabilityMB.decr(res.getMemorySize());
|
capabilityMB.decr(res.getMemorySize());
|
||||||
capabilityVirtualCores.decr(res.getVirtualCores());
|
capabilityVirtualCores.decr(res.getVirtualCores());
|
||||||
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
|
if (customResourceCapability != null) {
|
||||||
.get(ResourceInformation.GPU_URI);
|
customResourceCapability.decrease(res);
|
||||||
if (gpuIndex != null) {
|
|
||||||
capabilityGPUs.decr(res.
|
|
||||||
getResourceValue(ResourceInformation.GPU_URI));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -231,7 +231,8 @@ public class TestCSAllocateCustomResource {
|
||||||
assertEquals("Cluster Capability Vcores incorrect",
|
assertEquals("Cluster Capability Vcores incorrect",
|
||||||
metrics.getCapabilityVirtualCores(), 4 * 8);
|
metrics.getCapabilityVirtualCores(), 4 * 8);
|
||||||
assertEquals("Cluster Capability GPUs incorrect",
|
assertEquals("Cluster Capability GPUs incorrect",
|
||||||
metrics.getCapabilityGPUs(), 4 * 8);
|
(metrics.getCustomResourceCapability()
|
||||||
|
.get(GPU_URI)).longValue(), 4 * 8);
|
||||||
|
|
||||||
for (RMNode rmNode : rmNodes) {
|
for (RMNode rmNode : rmNodes) {
|
||||||
nodeTracker.removeNode(rmNode.getNodeID());
|
nodeTracker.removeNode(rmNode.getNodeID());
|
||||||
|
@ -243,7 +244,8 @@ public class TestCSAllocateCustomResource {
|
||||||
assertEquals("Cluster Capability Vcores incorrect",
|
assertEquals("Cluster Capability Vcores incorrect",
|
||||||
metrics.getCapabilityVirtualCores(), 0);
|
metrics.getCapabilityVirtualCores(), 0);
|
||||||
assertEquals("Cluster Capability GPUs incorrect",
|
assertEquals("Cluster Capability GPUs incorrect",
|
||||||
metrics.getCapabilityGPUs(), 0);
|
(metrics.getCustomResourceCapability()
|
||||||
|
.get(GPU_URI)).longValue(), 0);
|
||||||
ClusterMetrics.destroy();
|
ClusterMetrics.destroy();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue