YARN-3363. add localization and container launch time to ContainerMetrics at NM to show these timing information for each active container. (zxu via rkanter)

This commit is contained in:
Robert Kanter 2015-05-01 16:39:21 -07:00
parent d3d019c337
commit ac7d152901
6 changed files with 73 additions and 12 deletions

View File

@ -177,6 +177,10 @@ Release 2.8.0 - UNRELEASED
YARN-3406. Display count of running containers in the RM's Web UI.
(Ryu Kobayashi via ozawa)
YARN-3363. add localization and container launch time to ContainerMetrics
at NM to show these timing information for each active container.
(zxu via rkanter)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -96,6 +96,7 @@ public class ContainerImpl implements Container {
private int exitCode = ContainerExitStatus.INVALID;
private final StringBuilder diagnostics;
private boolean wasLaunched;
private long containerLocalizationStartTime;
private long containerLaunchStartTime;
private static Clock clock = new SystemClock();
@ -493,16 +494,21 @@ public class ContainerImpl implements Container {
// resource usage.
@SuppressWarnings("unchecked") // dispatcher not typed
private void sendContainerMonitorStartEvent() {
long pmemBytes = getResource().getMemory() * 1024 * 1024L;
float pmemRatio = daemonConf.getFloat(
YarnConfiguration.NM_VMEM_PMEM_RATIO,
YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
long vmemBytes = (long) (pmemRatio * pmemBytes);
int cpuVcores = getResource().getVirtualCores();
long launchDuration = clock.getTime() - containerLaunchStartTime;
metrics.addContainerLaunchDuration(launchDuration);
dispatcher.getEventHandler().handle(
new ContainerStartMonitoringEvent(containerId,
vmemBytes, pmemBytes, cpuVcores));
long pmemBytes = getResource().getMemory() * 1024 * 1024L;
float pmemRatio = daemonConf.getFloat(
YarnConfiguration.NM_VMEM_PMEM_RATIO,
YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
long vmemBytes = (long) (pmemRatio * pmemBytes);
int cpuVcores = getResource().getVirtualCores();
long localizationDuration = containerLaunchStartTime -
containerLocalizationStartTime;
dispatcher.getEventHandler().handle(
new ContainerStartMonitoringEvent(containerId,
vmemBytes, pmemBytes, cpuVcores, launchDuration,
localizationDuration));
}
private void addDiagnostics(String... diags) {
@ -601,6 +607,7 @@ public class ContainerImpl implements Container {
}
}
container.containerLocalizationStartTime = clock.getTime();
// Send requests for public, private resources
Map<String,LocalResource> cntrRsrc = ctxt.getLocalResources();
if (!cntrRsrc.isEmpty()) {
@ -756,8 +763,6 @@ public class ContainerImpl implements Container {
container.sendContainerMonitorStartEvent();
container.metrics.runningContainer();
container.wasLaunched = true;
long duration = clock.getTime() - container.containerLaunchStartTime;
container.metrics.addContainerLaunchDuration(duration);
if (container.recoveredAsKilled) {
LOG.info("Killing " + container.containerId

View File

@ -28,6 +28,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MetricsRegistry;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
import org.apache.hadoop.metrics2.lib.MutableStat;
import org.apache.hadoop.yarn.api.records.ContainerId;
@ -46,6 +47,9 @@ public class ContainerMetrics implements MetricsSource {
public static final String VMEM_LIMIT_METRIC_NAME = "vMemLimitMBs";
public static final String VCORE_LIMIT_METRIC_NAME = "vCoreLimit";
public static final String PMEM_USAGE_METRIC_NAME = "pMemUsageMBs";
public static final String LAUNCH_DURATION_METRIC_NAME = "launchDurationMs";
public static final String LOCALIZATION_DURATION_METRIC_NAME =
"localizationDurationMs";
private static final String PHY_CPU_USAGE_METRIC_NAME = "pCpuUsagePercent";
// Use a multiplier of 1000 to avoid losing too much precision when
@ -74,6 +78,12 @@ public class ContainerMetrics implements MetricsSource {
@Metric
public MutableGaugeInt cpuVcoreLimit;
@Metric
public MutableGaugeLong launchDurationMs;
@Metric
public MutableGaugeLong localizationDurationMs;
static final MetricsInfo RECORD_INFO =
info("ContainerResource", "Resource limit and usage by container");
@ -122,6 +132,10 @@ public class ContainerMetrics implements MetricsSource {
VMEM_LIMIT_METRIC_NAME, "Virtual memory limit in MBs", 0);
this.cpuVcoreLimit = registry.newGauge(
VCORE_LIMIT_METRIC_NAME, "CPU limit in number of vcores", 0);
this.launchDurationMs = registry.newGauge(
LAUNCH_DURATION_METRIC_NAME, "Launch duration in MS", 0L);
this.localizationDurationMs = registry.newGauge(
LOCALIZATION_DURATION_METRIC_NAME, "Localization duration in MS", 0L);
}
ContainerMetrics tag(MetricsInfo info, ContainerId containerId) {
@ -213,6 +227,12 @@ public class ContainerMetrics implements MetricsSource {
this.cpuVcoreLimit.set(cpuVcores);
}
public void recordStateChangeDurations(long launchDuration,
long localizationDuration) {
this.launchDurationMs.set(launchDuration);
this.localizationDurationMs.set(localizationDuration);
}
private synchronized void scheduleTimerTaskIfRequired() {
if (flushPeriodMs > 0) {
// Lazily initialize timer

View File

@ -25,13 +25,18 @@ public class ContainerStartMonitoringEvent extends ContainersMonitorEvent {
private final long vmemLimit;
private final long pmemLimit;
private final int cpuVcores;
private final long launchDuration;
private final long localizationDuration;
public ContainerStartMonitoringEvent(ContainerId containerId,
long vmemLimit, long pmemLimit, int cpuVcores) {
long vmemLimit, long pmemLimit, int cpuVcores, long launchDuration,
long localizationDuration) {
super(containerId, ContainersMonitorEventType.START_MONITORING_CONTAINER);
this.vmemLimit = vmemLimit;
this.pmemLimit = pmemLimit;
this.cpuVcores = cpuVcores;
this.launchDuration = launchDuration;
this.localizationDuration = localizationDuration;
}
public long getVmemLimit() {
@ -45,4 +50,12 @@ public class ContainerStartMonitoringEvent extends ContainersMonitorEvent {
public int getCpuVcores() {
return this.cpuVcores;
}
public long getLaunchDuration() {
return this.launchDuration;
}
public long getLocalizationDuration() {
return this.localizationDuration;
}
}

View File

@ -617,6 +617,15 @@ public class ContainersMonitorImpl extends AbstractService implements
case START_MONITORING_CONTAINER:
ContainerStartMonitoringEvent startEvent =
(ContainerStartMonitoringEvent) monitoringEvent;
if (containerMetricsEnabled) {
ContainerMetrics usageMetrics = ContainerMetrics
.forContainer(containerId, containerMetricsPeriodMs);
usageMetrics.recordStateChangeDurations(
startEvent.getLaunchDuration(),
startEvent.getLocalizationDuration());
}
synchronized (this.containersToBeAdded) {
ProcessTreeInfo processTreeInfo =
new ProcessTreeInfo(containerId, null, null,

View File

@ -87,10 +87,14 @@ public class TestContainerMetrics {
int anyPmemLimit = 1024;
int anyVmemLimit = 2048;
int anyVcores = 10;
long anyLaunchDuration = 20L;
long anyLocalizationDuration = 1000L;
String anyProcessId = "1234";
metrics.recordResourceLimit(anyVmemLimit, anyPmemLimit, anyVcores);
metrics.recordProcessId(anyProcessId);
metrics.recordStateChangeDurations(anyLaunchDuration,
anyLocalizationDuration);
Thread.sleep(110);
metrics.getMetrics(collector, true);
@ -105,6 +109,12 @@ public class TestContainerMetrics {
MetricsRecords.assertMetric(record, ContainerMetrics.VMEM_LIMIT_METRIC_NAME, anyVmemLimit);
MetricsRecords.assertMetric(record, ContainerMetrics.VCORE_LIMIT_METRIC_NAME, anyVcores);
MetricsRecords.assertMetric(record,
ContainerMetrics.LAUNCH_DURATION_METRIC_NAME, anyLaunchDuration);
MetricsRecords.assertMetric(record,
ContainerMetrics.LOCALIZATION_DURATION_METRIC_NAME,
anyLocalizationDuration);
collector.clear();
}
}