YARN-4811. Generate histograms in ContainerMetrics for actual container resource usage

This commit is contained in:
Jian He 2016-03-31 14:28:13 -07:00
parent 7a021471c3
commit 0dd9bcab97
6 changed files with 170 additions and 3 deletions

View File

@ -31,6 +31,7 @@
import org.apache.hadoop.metrics2.MetricsInfo;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.util.Quantile;
import org.apache.hadoop.metrics2.util.QuantileEstimator;
import org.apache.hadoop.metrics2.util.SampleQuantiles;
import com.google.common.annotations.VisibleForTesting;
@ -54,7 +55,7 @@ public class MutableQuantiles extends MutableMetric {
private final MetricsInfo[] quantileInfos;
private final int interval;
private SampleQuantiles estimator;
private QuantileEstimator estimator;
private long previousCount = 0;
@VisibleForTesting
@ -134,6 +135,10 @@ public int getInterval() {
return interval;
}
public synchronized void setEstimator(QuantileEstimator quantileEstimator) {
this.estimator = quantileEstimator;
}
/**
* Runnable used to periodically roll over the internal
* {@link SampleQuantiles} every interval.

View File

@ -0,0 +1,32 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metrics2.util;
import java.util.Map;
public interface QuantileEstimator {
void insert(long value);
Map<Quantile, Long> snapshot();
long getCount();
void clear();
}

View File

@ -47,7 +47,7 @@
*
*/
@InterfaceAudience.Private
public class SampleQuantiles {
public class SampleQuantiles implements QuantileEstimator {
/**
* Total number of items in stream

View File

@ -117,6 +117,11 @@
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
</dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-core</artifactId>
</dependency>
<!--
junit must be before mockito-all on the classpath. mockito-all bundles its
own copy of the hamcrest classes, but they don't match our junit version.

View File

@ -18,6 +18,9 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsInfo;
@ -29,13 +32,17 @@
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableStat;
import org.apache.hadoop.metrics2.util.Quantile;
import org.apache.hadoop.metrics2.util.QuantileEstimator;
import org.apache.hadoop.yarn.api.records.ContainerId;
import java.util.HashMap;
import java.util.Map;
import java.util.Timer;
import java.util.TimerTask;
import java.util.TreeMap;
import static org.apache.hadoop.metrics2.lib.Interns.info;
@ -47,10 +54,13 @@ public class ContainerMetrics implements MetricsSource {
public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs";
public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit";
public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs";
public static final String PMEM_USAGE_QUANTILES_NAME = "pMemUsageMBHistogram";
public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs";
public static final String LOCALIZATION_DURATION_METRIC_NAME =
"localizationDurationMs";
private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent";
private static final String PHY_CPU_USAGE_QUANTILES_NAME =
"pCpuUsagePercentHistogram";
// Use a multiplier of 1000 to avoid losing too much precision when
// converting to integers
@ -59,6 +69,9 @@ public class ContainerMetrics implements MetricsSource {
@Metric
public MutableStat pMemMBsStat;
@Metric
public MutableQuantiles pMemMBQuantiles;
// This tracks overall CPU percentage of the machine in terms of percentage
// of 1 core similar to top
// Thus if you use 2 cores completely out of 4 available cores this value
@ -66,6 +79,9 @@ public class ContainerMetrics implements MetricsSource {
@Metric
public MutableStat cpuCoreUsagePercent;
@Metric
public MutableQuantiles cpuCoreUsagePercentQuantiles;
@Metric
public MutableStat milliVcoresUsed;
@ -127,9 +143,23 @@ public class ContainerMetrics implements MetricsSource {
this.pMemMBsStat = registry.newStat(
PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true);
this.pMemMBQuantiles = registry
.newQuantiles(PMEM_USAGE_QUANTILES_NAME, "Physical memory quantiles",
"Usage", "MBs", 1);
ContainerMetricsQuantiles memEstimator =
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
pMemMBQuantiles.setEstimator(memEstimator);
this.cpuCoreUsagePercent = registry.newStat(
PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats",
"Usage", "Percents", true);
this.cpuCoreUsagePercentQuantiles = registry
.newQuantiles(PHY_CPU_USAGE_QUANTILES_NAME,
"Physical Cpu core percent usage quantiles", "Usage", "Percents",
1);
ContainerMetricsQuantiles cpuEstimator =
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
cpuCoreUsagePercentQuantiles.setEstimator(cpuEstimator);
this.milliVcoresUsed = registry.newStat(
VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage",
"MilliVcores", true);
@ -216,6 +246,7 @@ public synchronized void finished() {
public void recordMemoryUsage(int memoryMBs) {
if (memoryMBs >= 0) {
this.pMemMBsStat.add(memoryMBs);
this.pMemMBQuantiles.add(memoryMBs);
}
}
@ -223,6 +254,7 @@ public void recordCpuUsage(
int totalPhysicalCpuPercent, int milliVcoresUsed) {
if (totalPhysicalCpuPercent >=0) {
this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent);
this.cpuCoreUsagePercentQuantiles.add(totalPhysicalCpuPercent);
}
if (milliVcoresUsed >= 0) {
this.milliVcoresUsed.add(milliVcoresUsed);
@ -274,4 +306,41 @@ public void run() {
};
unregisterContainerMetricsTimer.schedule(timerTask, unregisterDelayMs);
}
public static class ContainerMetricsQuantiles implements QuantileEstimator {
private final Histogram histogram = new Histogram(new UniformReservoir());
private Quantile[] quantiles;
ContainerMetricsQuantiles(Quantile[] q) {
quantiles = q;
}
@Override
public synchronized void insert(long value) {
histogram.update(value);
}
@Override
synchronized public long getCount() {
return histogram.getCount();
}
@Override
synchronized public void clear() {
// don't do anything because we want metrics over the lifetime of the
// container
}
@Override
public synchronized Map<Quantile, Long> snapshot() {
Snapshot snapshot = histogram.getSnapshot();
Map<Quantile, Long> values = new TreeMap<>();
for (Quantile quantile : quantiles) {
values.put(quantile, (long) snapshot.getValue(quantile.quantile));
}
return values;
}
}
}

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.metrics2.AbstractMetric;
import org.apache.hadoop.metrics2.MetricsRecord;
import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
@ -26,10 +27,15 @@
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.junit.Assert;
import org.junit.Test;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString;
@ -158,4 +164,54 @@ public void testContainerMetricsFinished() throws InterruptedException {
system, containerId3, 1, 0));
system.shutdown();
}
/**
* Run a test to submit values for actual memory usage and see if the
* histogram comes out correctly.
* @throws Exception
*/
@Test
public void testContainerMetricsHistogram() throws Exception {
// submit 2 values - 1024 and 2048. 75th, 90th, 95th and 99th percentiles
// will be 2048. 50th percentile will be 1536((1024+2048)/2)
// if we keep recording 1024 and 2048 in a loop, the 50th percentile
// will tend closer to 2048
Map<String, Long> expectedValues = new HashMap<>();
expectedValues.put("PMemUsageMBHistogram50thPercentileMBs", 1536L);
expectedValues.put("PMemUsageMBHistogram75thPercentileMBs", 2048L);
expectedValues.put("PMemUsageMBHistogram90thPercentileMBs", 2048L);
expectedValues.put("PMemUsageMBHistogram95thPercentileMBs", 2048L);
expectedValues.put("PMemUsageMBHistogram99thPercentileMBs", 2048L);
expectedValues.put("PCpuUsagePercentHistogram50thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram75thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram90thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram95thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram99thPercentilePercents", 0L);
Set<String> testResults = new HashSet<>();
int delay = 10;
int rolloverDelay = 1000;
MetricsCollectorImpl collector = new MetricsCollectorImpl();
ContainerId containerId = mock(ContainerId.class);
ContainerMetrics metrics =
ContainerMetrics.forContainer(containerId, delay, 0);
metrics.recordMemoryUsage(1024);
metrics.recordMemoryUsage(2048);
Thread.sleep(rolloverDelay + 10);
metrics.getMetrics(collector, true);
for (MetricsRecord record : collector.getRecords()) {
for (AbstractMetric metric : record.metrics()) {
String metricName = metric.name();
if (expectedValues.containsKey(metricName)) {
Long expectedValue = expectedValues.get(metricName);
Assert.assertEquals(
"Metric " + metricName + " doesn't have expected value",
expectedValue, metric.value());
testResults.add(metricName);
}
}
}
Assert.assertEquals(expectedValues.keySet(), testResults);
}
}