YARN-2679. Add metric for container launch duration. (Zhihai Xu via kasha)

(cherry picked from commit 233b61e495)
This commit is contained in:
Karthik Kambatla 2014-11-21 14:09:48 -08:00
parent b395f58e85
commit af0b54a4ee
4 changed files with 19 additions and 1 deletions

View File

@ -60,6 +60,9 @@ Release 2.7.0 - UNRELEASED
YARN-2604. Scheduler should consider max-allocation-* in conjunction YARN-2604. Scheduler should consider max-allocation-* in conjunction
with the largest node. (Robert Kanter via kasha) with the largest node. (Robert Kanter via kasha)
YARN-2679. Add metric for container launch duration. (Zhihai Xu via kasha)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -74,7 +74,9 @@ import org.apache.hadoop.yarn.state.MultipleArcTransition;
import org.apache.hadoop.yarn.state.SingleArcTransition; import org.apache.hadoop.yarn.state.SingleArcTransition;
import org.apache.hadoop.yarn.state.StateMachine; import org.apache.hadoop.yarn.state.StateMachine;
import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.state.StateMachineFactory;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.SystemClock;
public class ContainerImpl implements Container { public class ContainerImpl implements Container {
@ -92,6 +94,8 @@ public class ContainerImpl implements Container {
private int exitCode = ContainerExitStatus.INVALID; private int exitCode = ContainerExitStatus.INVALID;
private final StringBuilder diagnostics; private final StringBuilder diagnostics;
private boolean wasLaunched; private boolean wasLaunched;
private long containerLaunchStartTime;
private static Clock clock = new SystemClock();
/** The NM-wide configuration - not specific to this container */ /** The NM-wide configuration - not specific to this container */
private final Configuration daemonConf; private final Configuration daemonConf;
@ -521,6 +525,7 @@ public class ContainerImpl implements Container {
// try to recover a container that was previously launched // try to recover a container that was previously launched
launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER; launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER;
} }
containerLaunchStartTime = clock.getTime();
dispatcher.getEventHandler().handle( dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(this, launcherEvent)); new ContainersLauncherEvent(this, launcherEvent));
} }
@ -781,6 +786,8 @@ public class ContainerImpl implements Container {
container.sendContainerMonitorStartEvent(); container.sendContainerMonitorStartEvent();
container.metrics.runningContainer(); container.metrics.runningContainer();
container.wasLaunched = true; container.wasLaunched = true;
long duration = clock.getTime() - container.containerLaunchStartTime;
container.metrics.addContainerLaunchDuration(duration);
if (container.recoveredAsKilled) { if (container.recoveredAsKilled) {
LOG.info("Killing " + container.containerId LOG.info("Killing " + container.containerId

View File

@ -23,6 +23,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.MutableCounterInt; import org.apache.hadoop.metrics2.lib.MutableCounterInt;
import org.apache.hadoop.metrics2.lib.MutableGaugeInt; import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
import org.apache.hadoop.metrics2.lib.MutableRate;
import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.metrics2.source.JvmMetrics;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
@ -43,6 +44,8 @@ public class NodeManagerMetrics {
@Metric("Current allocated Virtual Cores") @Metric("Current allocated Virtual Cores")
MutableGaugeInt allocatedVCores; MutableGaugeInt allocatedVCores;
@Metric MutableGaugeInt availableVCores; @Metric MutableGaugeInt availableVCores;
@Metric("Container launch duration")
MutableRate containerLaunchDuration;
public static NodeManagerMetrics create() { public static NodeManagerMetrics create() {
return create(DefaultMetricsSystem.instance()); return create(DefaultMetricsSystem.instance());
@ -107,7 +110,11 @@ public class NodeManagerMetrics {
availableGB.incr(res.getMemory() / 1024); availableGB.incr(res.getMemory() / 1024);
availableVCores.incr(res.getVirtualCores()); availableVCores.incr(res.getVirtualCores());
} }
public void addContainerLaunchDuration(long value) {
containerLaunchDuration.add(value);
}
public int getRunningContainers() { public int getRunningContainers() {
return containersRunning.value(); return containersRunning.value();
} }

View File

@ -59,6 +59,7 @@ public class TestNodeManagerMetrics {
metrics.initingContainer(); metrics.initingContainer();
metrics.runningContainer(); metrics.runningContainer();
metrics.addContainerLaunchDuration(1);
checkMetrics(5, 1, 1, 1, 1, 1, 2, 2, 6, 4, 12); checkMetrics(5, 1, 1, 1, 1, 1, 2, 2, 6, 4, 12);
} }