YARN-4811. Generate histograms in ContainerMetrics for actual container resource usage
This commit is contained in:
parent
b91928cd26
commit
3afc2caec8
|
@ -31,6 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.metrics2.MetricsInfo;
|
import org.apache.hadoop.metrics2.MetricsInfo;
|
||||||
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
|
||||||
import org.apache.hadoop.metrics2.util.Quantile;
|
import org.apache.hadoop.metrics2.util.Quantile;
|
||||||
|
import org.apache.hadoop.metrics2.util.QuantileEstimator;
|
||||||
import org.apache.hadoop.metrics2.util.SampleQuantiles;
|
import org.apache.hadoop.metrics2.util.SampleQuantiles;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
@ -54,7 +55,7 @@ public class MutableQuantiles extends MutableMetric {
|
||||||
private final MetricsInfo[] quantileInfos;
|
private final MetricsInfo[] quantileInfos;
|
||||||
private final int interval;
|
private final int interval;
|
||||||
|
|
||||||
private SampleQuantiles estimator;
|
private QuantileEstimator estimator;
|
||||||
private long previousCount = 0;
|
private long previousCount = 0;
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@ -134,6 +135,10 @@ public class MutableQuantiles extends MutableMetric {
|
||||||
return interval;
|
return interval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public synchronized void setEstimator(QuantileEstimator quantileEstimator) {
|
||||||
|
this.estimator = quantileEstimator;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Runnable used to periodically roll over the internal
|
* Runnable used to periodically roll over the internal
|
||||||
* {@link SampleQuantiles} every interval.
|
* {@link SampleQuantiles} every interval.
|
||||||
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.metrics2.util;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public interface QuantileEstimator {
|
||||||
|
|
||||||
|
void insert(long value);
|
||||||
|
|
||||||
|
Map<Quantile, Long> snapshot();
|
||||||
|
|
||||||
|
long getCount();
|
||||||
|
|
||||||
|
void clear();
|
||||||
|
}
|
|
@ -47,7 +47,7 @@ import com.google.common.base.Preconditions;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public class SampleQuantiles {
|
public class SampleQuantiles implements QuantileEstimator {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Total number of items in stream
|
* Total number of items in stream
|
||||||
|
|
|
@ -117,6 +117,11 @@
|
||||||
<groupId>com.google.protobuf</groupId>
|
<groupId>com.google.protobuf</groupId>
|
||||||
<artifactId>protobuf-java</artifactId>
|
<artifactId>protobuf-java</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.codahale.metrics</groupId>
|
||||||
|
<artifactId>metrics-core</artifactId>
|
||||||
|
</dependency>
|
||||||
<!--
|
<!--
|
||||||
junit must be before mockito-all on the classpath. mockito-all bundles its
|
junit must be before mockito-all on the classpath. mockito-all bundles its
|
||||||
own copy of the hamcrest classes, but they don't match our junit version.
|
own copy of the hamcrest classes, but they don't match our junit version.
|
||||||
|
|
|
@ -18,6 +18,9 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
||||||
|
|
||||||
|
import com.codahale.metrics.Histogram;
|
||||||
|
import com.codahale.metrics.Snapshot;
|
||||||
|
import com.codahale.metrics.UniformReservoir;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.metrics2.MetricsCollector;
|
import org.apache.hadoop.metrics2.MetricsCollector;
|
||||||
import org.apache.hadoop.metrics2.MetricsInfo;
|
import org.apache.hadoop.metrics2.MetricsInfo;
|
||||||
|
@ -29,13 +32,17 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableStat;
|
import org.apache.hadoop.metrics2.lib.MutableStat;
|
||||||
|
import org.apache.hadoop.metrics2.util.Quantile;
|
||||||
|
import org.apache.hadoop.metrics2.util.QuantileEstimator;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Timer;
|
import java.util.Timer;
|
||||||
import java.util.TimerTask;
|
import java.util.TimerTask;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
import static org.apache.hadoop.metrics2.lib.Interns.info;
|
import static org.apache.hadoop.metrics2.lib.Interns.info;
|
||||||
|
|
||||||
|
@ -47,10 +54,13 @@ public class ContainerMetrics implements MetricsSource {
|
||||||
public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs";
|
public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs";
|
||||||
public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit";
|
public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit";
|
||||||
public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs";
|
public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs";
|
||||||
|
public static final String PMEM_USAGE_QUANTILES_NAME = "pMemUsageMBHistogram";
|
||||||
public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs";
|
public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs";
|
||||||
public static final String LOCALIZATION_DURATION_METRIC_NAME =
|
public static final String LOCALIZATION_DURATION_METRIC_NAME =
|
||||||
"localizationDurationMs";
|
"localizationDurationMs";
|
||||||
private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent";
|
private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent";
|
||||||
|
private static final String PHY_CPU_USAGE_QUANTILES_NAME =
|
||||||
|
"pCpuUsagePercentHistogram";
|
||||||
|
|
||||||
// Use a multiplier of 1000 to avoid losing too much precision when
|
// Use a multiplier of 1000 to avoid losing too much precision when
|
||||||
// converting to integers
|
// converting to integers
|
||||||
|
@ -59,6 +69,9 @@ public class ContainerMetrics implements MetricsSource {
|
||||||
@Metric
|
@Metric
|
||||||
public MutableStat pMemMBsStat;
|
public MutableStat pMemMBsStat;
|
||||||
|
|
||||||
|
@Metric
|
||||||
|
public MutableQuantiles pMemMBQuantiles;
|
||||||
|
|
||||||
// This tracks overall CPU percentage of the machine in terms of percentage
|
// This tracks overall CPU percentage of the machine in terms of percentage
|
||||||
// of 1 core similar to top
|
// of 1 core similar to top
|
||||||
// Thus if you use 2 cores completely out of 4 available cores this value
|
// Thus if you use 2 cores completely out of 4 available cores this value
|
||||||
|
@ -66,6 +79,9 @@ public class ContainerMetrics implements MetricsSource {
|
||||||
@Metric
|
@Metric
|
||||||
public MutableStat cpuCoreUsagePercent;
|
public MutableStat cpuCoreUsagePercent;
|
||||||
|
|
||||||
|
@Metric
|
||||||
|
public MutableQuantiles cpuCoreUsagePercentQuantiles;
|
||||||
|
|
||||||
@Metric
|
@Metric
|
||||||
public MutableStat milliVcoresUsed;
|
public MutableStat milliVcoresUsed;
|
||||||
|
|
||||||
|
@ -127,9 +143,23 @@ public class ContainerMetrics implements MetricsSource {
|
||||||
|
|
||||||
this.pMemMBsStat = registry.newStat(
|
this.pMemMBsStat = registry.newStat(
|
||||||
PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true);
|
PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true);
|
||||||
|
this.pMemMBQuantiles = registry
|
||||||
|
.newQuantiles(PMEM_USAGE_QUANTILES_NAME, "Physical memory quantiles",
|
||||||
|
"Usage", "MBs", 1);
|
||||||
|
ContainerMetricsQuantiles memEstimator =
|
||||||
|
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
|
||||||
|
pMemMBQuantiles.setEstimator(memEstimator);
|
||||||
|
|
||||||
this.cpuCoreUsagePercent = registry.newStat(
|
this.cpuCoreUsagePercent = registry.newStat(
|
||||||
PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats",
|
PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats",
|
||||||
"Usage", "Percents", true);
|
"Usage", "Percents", true);
|
||||||
|
this.cpuCoreUsagePercentQuantiles = registry
|
||||||
|
.newQuantiles(PHY_CPU_USAGE_QUANTILES_NAME,
|
||||||
|
"Physical Cpu core percent usage quantiles", "Usage", "Percents",
|
||||||
|
1);
|
||||||
|
ContainerMetricsQuantiles cpuEstimator =
|
||||||
|
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
|
||||||
|
cpuCoreUsagePercentQuantiles.setEstimator(cpuEstimator);
|
||||||
this.milliVcoresUsed = registry.newStat(
|
this.milliVcoresUsed = registry.newStat(
|
||||||
VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage",
|
VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage",
|
||||||
"MilliVcores", true);
|
"MilliVcores", true);
|
||||||
|
@ -216,6 +246,7 @@ public class ContainerMetrics implements MetricsSource {
|
||||||
public void recordMemoryUsage(int memoryMBs) {
|
public void recordMemoryUsage(int memoryMBs) {
|
||||||
if (memoryMBs >= 0) {
|
if (memoryMBs >= 0) {
|
||||||
this.pMemMBsStat.add(memoryMBs);
|
this.pMemMBsStat.add(memoryMBs);
|
||||||
|
this.pMemMBQuantiles.add(memoryMBs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,6 +254,7 @@ public class ContainerMetrics implements MetricsSource {
|
||||||
int totalPhysicalCpuPercent, int milliVcoresUsed) {
|
int totalPhysicalCpuPercent, int milliVcoresUsed) {
|
||||||
if (totalPhysicalCpuPercent >=0) {
|
if (totalPhysicalCpuPercent >=0) {
|
||||||
this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent);
|
this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent);
|
||||||
|
this.cpuCoreUsagePercentQuantiles.add(totalPhysicalCpuPercent);
|
||||||
}
|
}
|
||||||
if (milliVcoresUsed >= 0) {
|
if (milliVcoresUsed >= 0) {
|
||||||
this.milliVcoresUsed.add(milliVcoresUsed);
|
this.milliVcoresUsed.add(milliVcoresUsed);
|
||||||
|
@ -274,4 +306,41 @@ public class ContainerMetrics implements MetricsSource {
|
||||||
};
|
};
|
||||||
unregisterContainerMetricsTimer.schedule(timerTask, unregisterDelayMs);
|
unregisterContainerMetricsTimer.schedule(timerTask, unregisterDelayMs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static class ContainerMetricsQuantiles implements QuantileEstimator {
|
||||||
|
|
||||||
|
private final Histogram histogram = new Histogram(new UniformReservoir());
|
||||||
|
|
||||||
|
private Quantile[] quantiles;
|
||||||
|
|
||||||
|
ContainerMetricsQuantiles(Quantile[] q) {
|
||||||
|
quantiles = q;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void insert(long value) {
|
||||||
|
histogram.update(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
synchronized public long getCount() {
|
||||||
|
return histogram.getCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
synchronized public void clear() {
|
||||||
|
// don't do anything because we want metrics over the lifetime of the
|
||||||
|
// container
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized Map<Quantile, Long> snapshot() {
|
||||||
|
Snapshot snapshot = histogram.getSnapshot();
|
||||||
|
Map<Quantile, Long> values = new TreeMap<>();
|
||||||
|
for (Quantile quantile : quantiles) {
|
||||||
|
values.put(quantile, (long) snapshot.getValue(quantile.quantile));
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
|
|
||||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
|
||||||
|
|
||||||
|
import org.apache.hadoop.metrics2.AbstractMetric;
|
||||||
import org.apache.hadoop.metrics2.MetricsRecord;
|
import org.apache.hadoop.metrics2.MetricsRecord;
|
||||||
import org.apache.hadoop.metrics2.MetricsSystem;
|
import org.apache.hadoop.metrics2.MetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
|
import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
|
||||||
|
@ -26,10 +27,15 @@ import org.apache.hadoop.metrics2.impl.MetricsSystemImpl;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
import static org.junit.Assert.assertNotNull;
|
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Matchers.anyString;
|
import static org.mockito.Matchers.anyString;
|
||||||
|
@ -158,4 +164,54 @@ public class TestContainerMetrics {
|
||||||
system, containerId3, 1, 0));
|
system, containerId3, 1, 0));
|
||||||
system.shutdown();
|
system.shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run a test to submit values for actual memory usage and see if the
|
||||||
|
* histogram comes out correctly.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testContainerMetricsHistogram() throws Exception {
|
||||||
|
|
||||||
|
// submit 2 values - 1024 and 2048. 75th, 90th, 95th and 99th percentiles
|
||||||
|
// will be 2048. 50th percentile will be 1536((1024+2048)/2)
|
||||||
|
// if we keep recording 1024 and 2048 in a loop, the 50th percentile
|
||||||
|
// will tend closer to 2048
|
||||||
|
Map<String, Long> expectedValues = new HashMap<>();
|
||||||
|
expectedValues.put("PMemUsageMBHistogram50thPercentileMBs", 1536L);
|
||||||
|
expectedValues.put("PMemUsageMBHistogram75thPercentileMBs", 2048L);
|
||||||
|
expectedValues.put("PMemUsageMBHistogram90thPercentileMBs", 2048L);
|
||||||
|
expectedValues.put("PMemUsageMBHistogram95thPercentileMBs", 2048L);
|
||||||
|
expectedValues.put("PMemUsageMBHistogram99thPercentileMBs", 2048L);
|
||||||
|
expectedValues.put("PCpuUsagePercentHistogram50thPercentilePercents", 0L);
|
||||||
|
expectedValues.put("PCpuUsagePercentHistogram75thPercentilePercents", 0L);
|
||||||
|
expectedValues.put("PCpuUsagePercentHistogram90thPercentilePercents", 0L);
|
||||||
|
expectedValues.put("PCpuUsagePercentHistogram95thPercentilePercents", 0L);
|
||||||
|
expectedValues.put("PCpuUsagePercentHistogram99thPercentilePercents", 0L);
|
||||||
|
Set<String> testResults = new HashSet<>();
|
||||||
|
int delay = 10;
|
||||||
|
int rolloverDelay = 1000;
|
||||||
|
MetricsCollectorImpl collector = new MetricsCollectorImpl();
|
||||||
|
ContainerId containerId = mock(ContainerId.class);
|
||||||
|
ContainerMetrics metrics =
|
||||||
|
ContainerMetrics.forContainer(containerId, delay, 0);
|
||||||
|
|
||||||
|
metrics.recordMemoryUsage(1024);
|
||||||
|
metrics.recordMemoryUsage(2048);
|
||||||
|
Thread.sleep(rolloverDelay + 10);
|
||||||
|
metrics.getMetrics(collector, true);
|
||||||
|
for (MetricsRecord record : collector.getRecords()) {
|
||||||
|
for (AbstractMetric metric : record.metrics()) {
|
||||||
|
String metricName = metric.name();
|
||||||
|
if (expectedValues.containsKey(metricName)) {
|
||||||
|
Long expectedValue = expectedValues.get(metricName);
|
||||||
|
Assert.assertEquals(
|
||||||
|
"Metric " + metricName + " doesn't have expected value",
|
||||||
|
expectedValue, metric.value());
|
||||||
|
testResults.add(metricName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Assert.assertEquals(expectedValues.keySet(), testResults);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue