YARN-4811. Generate histograms in ContainerMetrics for actual container resource usage

This commit is contained in:
Jian He 2016-03-31 14:28:13 -07:00
parent b91928cd26
commit 3afc2caec8
6 changed files with 170 additions and 3 deletions

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsInfo;
import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.util.Quantile; import org.apache.hadoop.metrics2.util.Quantile;
import org.apache.hadoop.metrics2.util.QuantileEstimator;
import org.apache.hadoop.metrics2.util.SampleQuantiles; import org.apache.hadoop.metrics2.util.SampleQuantiles;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -54,7 +55,7 @@ public class MutableQuantiles extends MutableMetric {
private final MetricsInfo[] quantileInfos; private final MetricsInfo[] quantileInfos;
private final int interval; private final int interval;
private SampleQuantiles estimator; private QuantileEstimator estimator;
private long previousCount = 0; private long previousCount = 0;
@VisibleForTesting @VisibleForTesting
@ -134,6 +135,10 @@ public class MutableQuantiles extends MutableMetric {
return interval; return interval;
} }
public synchronized void setEstimator(QuantileEstimator quantileEstimator) {
this.estimator = quantileEstimator;
}
/** /**
* Runnable used to periodically roll over the internal * Runnable used to periodically roll over the internal
* {@link SampleQuantiles} every interval. * {@link SampleQuantiles} every interval.

View File

@ -0,0 +1,32 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.metrics2.util;
import java.util.Map;
public interface QuantileEstimator {
void insert(long value);
Map<Quantile, Long> snapshot();
long getCount();
void clear();
}

View File

@ -47,7 +47,7 @@ import com.google.common.base.Preconditions;
* *
*/ */
@InterfaceAudience.Private @InterfaceAudience.Private
public class SampleQuantiles { public class SampleQuantiles implements QuantileEstimator {
/** /**
* Total number of items in stream * Total number of items in stream

View File

@ -117,6 +117,11 @@
<groupId>com.google.protobuf</groupId> <groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId> <artifactId>protobuf-java</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.codahale.metrics</groupId>
<artifactId>metrics-core</artifactId>
</dependency>
<!-- <!--
junit must be before mockito-all on the classpath. mockito-all bundles its junit must be before mockito-all on the classpath. mockito-all bundles its
own copy of the hamcrest classes, but they don't match our junit version. own copy of the hamcrest classes, but they don't match our junit version.

View File

@ -18,6 +18,9 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Snapshot;
import com.codahale.metrics.UniformReservoir;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.metrics2.MetricsCollector; import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsInfo; import org.apache.hadoop.metrics2.MetricsInfo;
@ -29,13 +32,17 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
import org.apache.hadoop.metrics2.lib.MutableStat; import org.apache.hadoop.metrics2.lib.MutableStat;
import org.apache.hadoop.metrics2.util.Quantile;
import org.apache.hadoop.metrics2.util.QuantileEstimator;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Timer; import java.util.Timer;
import java.util.TimerTask; import java.util.TimerTask;
import java.util.TreeMap;
import static org.apache.hadoop.metrics2.lib.Interns.info; import static org.apache.hadoop.metrics2.lib.Interns.info;
@ -47,10 +54,13 @@ public class ContainerMetrics implements MetricsSource {
public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs"; public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs";
public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit"; public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit";
public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs"; public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs";
public static final String PMEM_USAGE_QUANTILES_NAME = "pMemUsageMBHistogram";
public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs"; public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs";
public static final String LOCALIZATION_DURATION_METRIC_NAME = public static final String LOCALIZATION_DURATION_METRIC_NAME =
"localizationDurationMs"; "localizationDurationMs";
private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent"; private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent";
private static final String PHY_CPU_USAGE_QUANTILES_NAME =
"pCpuUsagePercentHistogram";
// Use a multiplier of 1000 to avoid losing too much precision when // Use a multiplier of 1000 to avoid losing too much precision when
// converting to integers // converting to integers
@ -59,6 +69,9 @@ public class ContainerMetrics implements MetricsSource {
@Metric @Metric
public MutableStat pMemMBsStat; public MutableStat pMemMBsStat;
@Metric
public MutableQuantiles pMemMBQuantiles;
// This tracks overall CPU percentage of the machine in terms of percentage // This tracks overall CPU percentage of the machine in terms of percentage
// of 1 core similar to top // of 1 core similar to top
// Thus if you use 2 cores completely out of 4 available cores this value // Thus if you use 2 cores completely out of 4 available cores this value
@ -66,6 +79,9 @@ public class ContainerMetrics implements MetricsSource {
@Metric @Metric
public MutableStat cpuCoreUsagePercent; public MutableStat cpuCoreUsagePercent;
@Metric
public MutableQuantiles cpuCoreUsagePercentQuantiles;
@Metric @Metric
public MutableStat milliVcoresUsed; public MutableStat milliVcoresUsed;
@ -127,9 +143,23 @@ public class ContainerMetrics implements MetricsSource {
this.pMemMBsStat = registry.newStat( this.pMemMBsStat = registry.newStat(
PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true); PMEM_USAGE_METRIC_NAME, "Physical memory stats", "Usage", "MBs", true);
this.pMemMBQuantiles = registry
.newQuantiles(PMEM_USAGE_QUANTILES_NAME, "Physical memory quantiles",
"Usage", "MBs", 1);
ContainerMetricsQuantiles memEstimator =
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
pMemMBQuantiles.setEstimator(memEstimator);
this.cpuCoreUsagePercent = registry.newStat( this.cpuCoreUsagePercent = registry.newStat(
PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats", PHY_CPU_USAGE_METRIC_NAME, "Physical Cpu core percent usage stats",
"Usage", "Percents", true); "Usage", "Percents", true);
this.cpuCoreUsagePercentQuantiles = registry
.newQuantiles(PHY_CPU_USAGE_QUANTILES_NAME,
"Physical Cpu core percent usage quantiles", "Usage", "Percents",
1);
ContainerMetricsQuantiles cpuEstimator =
new ContainerMetricsQuantiles(MutableQuantiles.quantiles);
cpuCoreUsagePercentQuantiles.setEstimator(cpuEstimator);
this.milliVcoresUsed = registry.newStat( this.milliVcoresUsed = registry.newStat(
VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage", VCORE_USAGE_METRIC_NAME, "1000 times Vcore usage", "Usage",
"MilliVcores", true); "MilliVcores", true);
@ -216,6 +246,7 @@ public class ContainerMetrics implements MetricsSource {
public void recordMemoryUsage(int memoryMBs) { public void recordMemoryUsage(int memoryMBs) {
if (memoryMBs >= 0) { if (memoryMBs >= 0) {
this.pMemMBsStat.add(memoryMBs); this.pMemMBsStat.add(memoryMBs);
this.pMemMBQuantiles.add(memoryMBs);
} }
} }
@ -223,6 +254,7 @@ public class ContainerMetrics implements MetricsSource {
int totalPhysicalCpuPercent, int milliVcoresUsed) { int totalPhysicalCpuPercent, int milliVcoresUsed) {
if (totalPhysicalCpuPercent >=0) { if (totalPhysicalCpuPercent >=0) {
this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent); this.cpuCoreUsagePercent.add(totalPhysicalCpuPercent);
this.cpuCoreUsagePercentQuantiles.add(totalPhysicalCpuPercent);
} }
if (milliVcoresUsed >= 0) { if (milliVcoresUsed >= 0) {
this.milliVcoresUsed.add(milliVcoresUsed); this.milliVcoresUsed.add(milliVcoresUsed);
@ -274,4 +306,41 @@ public class ContainerMetrics implements MetricsSource {
}; };
unregisterContainerMetricsTimer.schedule(timerTask, unregisterDelayMs); unregisterContainerMetricsTimer.schedule(timerTask, unregisterDelayMs);
} }
public static class ContainerMetricsQuantiles implements QuantileEstimator {
private final Histogram histogram = new Histogram(new UniformReservoir());
private Quantile[] quantiles;
ContainerMetricsQuantiles(Quantile[] q) {
quantiles = q;
}
@Override
public synchronized void insert(long value) {
histogram.update(value);
}
@Override
synchronized public long getCount() {
return histogram.getCount();
}
@Override
synchronized public void clear() {
// don't do anything because we want metrics over the lifetime of the
// container
}
@Override
public synchronized Map<Quantile, Long> snapshot() {
Snapshot snapshot = histogram.getSnapshot();
Map<Quantile, Long> values = new TreeMap<>();
for (Quantile quantile : quantiles) {
values.put(quantile, (long) snapshot.getValue(quantile.quantile));
}
return values;
}
}
} }

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.metrics2.AbstractMetric;
import org.apache.hadoop.metrics2.MetricsRecord; import org.apache.hadoop.metrics2.MetricsRecord;
import org.apache.hadoop.metrics2.MetricsSystem; import org.apache.hadoop.metrics2.MetricsSystem;
import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl; import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl;
@ -26,10 +27,15 @@ import org.apache.hadoop.metrics2.impl.MetricsSystemImpl;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.mockito.Matchers.any; import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyString; import static org.mockito.Matchers.anyString;
@ -158,4 +164,54 @@ public class TestContainerMetrics {
system, containerId3, 1, 0)); system, containerId3, 1, 0));
system.shutdown(); system.shutdown();
} }
/**
* Run a test to submit values for actual memory usage and see if the
* histogram comes out correctly.
* @throws Exception
*/
@Test
public void testContainerMetricsHistogram() throws Exception {
// submit 2 values - 1024 and 2048. 75th, 90th, 95th and 99th percentiles
// will be 2048. 50th percentile will be 1536((1024+2048)/2)
// if we keep recording 1024 and 2048 in a loop, the 50th percentile
// will tend closer to 2048
Map<String, Long> expectedValues = new HashMap<>();
expectedValues.put("PMemUsageMBHistogram50thPercentileMBs", 1536L);
expectedValues.put("PMemUsageMBHistogram75thPercentileMBs", 2048L);
expectedValues.put("PMemUsageMBHistogram90thPercentileMBs", 2048L);
expectedValues.put("PMemUsageMBHistogram95thPercentileMBs", 2048L);
expectedValues.put("PMemUsageMBHistogram99thPercentileMBs", 2048L);
expectedValues.put("PCpuUsagePercentHistogram50thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram75thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram90thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram95thPercentilePercents", 0L);
expectedValues.put("PCpuUsagePercentHistogram99thPercentilePercents", 0L);
Set<String> testResults = new HashSet<>();
int delay = 10;
int rolloverDelay = 1000;
MetricsCollectorImpl collector = new MetricsCollectorImpl();
ContainerId containerId = mock(ContainerId.class);
ContainerMetrics metrics =
ContainerMetrics.forContainer(containerId, delay, 0);
metrics.recordMemoryUsage(1024);
metrics.recordMemoryUsage(2048);
Thread.sleep(rolloverDelay + 10);
metrics.getMetrics(collector, true);
for (MetricsRecord record : collector.getRecords()) {
for (AbstractMetric metric : record.metrics()) {
String metricName = metric.name();
if (expectedValues.containsKey(metricName)) {
Long expectedValue = expectedValues.get(metricName);
Assert.assertEquals(
"Metric " + metricName + " doesn't have expected value",
expectedValue, metric.value());
testResults.add(metricName);
}
}
}
Assert.assertEquals(expectedValues.keySet(), testResults);
}
} }