YARN-2679. Add metric for container launch duration. (Zhihai Xu via kasha)
(cherry picked from commit 233b61e495
)
This commit is contained in:
parent
b395f58e85
commit
af0b54a4ee
|
@ -60,6 +60,9 @@ Release 2.7.0 - UNRELEASED
|
||||||
YARN-2604. Scheduler should consider max-allocation-* in conjunction
|
YARN-2604. Scheduler should consider max-allocation-* in conjunction
|
||||||
with the largest node. (Robert Kanter via kasha)
|
with the largest node. (Robert Kanter via kasha)
|
||||||
|
|
||||||
|
YARN-2679. Add metric for container launch duration. (Zhihai Xu via kasha)
|
||||||
|
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -74,7 +74,9 @@ import org.apache.hadoop.yarn.state.MultipleArcTransition;
|
||||||
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
import org.apache.hadoop.yarn.state.SingleArcTransition;
|
||||||
import org.apache.hadoop.yarn.state.StateMachine;
|
import org.apache.hadoop.yarn.state.StateMachine;
|
||||||
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
import org.apache.hadoop.yarn.state.StateMachineFactory;
|
||||||
|
import org.apache.hadoop.yarn.util.Clock;
|
||||||
import org.apache.hadoop.yarn.util.ConverterUtils;
|
import org.apache.hadoop.yarn.util.ConverterUtils;
|
||||||
|
import org.apache.hadoop.yarn.util.SystemClock;
|
||||||
|
|
||||||
public class ContainerImpl implements Container {
|
public class ContainerImpl implements Container {
|
||||||
|
|
||||||
|
@ -92,6 +94,8 @@ public class ContainerImpl implements Container {
|
||||||
private int exitCode = ContainerExitStatus.INVALID;
|
private int exitCode = ContainerExitStatus.INVALID;
|
||||||
private final StringBuilder diagnostics;
|
private final StringBuilder diagnostics;
|
||||||
private boolean wasLaunched;
|
private boolean wasLaunched;
|
||||||
|
private long containerLaunchStartTime;
|
||||||
|
private static Clock clock = new SystemClock();
|
||||||
|
|
||||||
/** The NM-wide configuration - not specific to this container */
|
/** The NM-wide configuration - not specific to this container */
|
||||||
private final Configuration daemonConf;
|
private final Configuration daemonConf;
|
||||||
|
@ -521,6 +525,7 @@ public class ContainerImpl implements Container {
|
||||||
// try to recover a container that was previously launched
|
// try to recover a container that was previously launched
|
||||||
launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER;
|
launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER;
|
||||||
}
|
}
|
||||||
|
containerLaunchStartTime = clock.getTime();
|
||||||
dispatcher.getEventHandler().handle(
|
dispatcher.getEventHandler().handle(
|
||||||
new ContainersLauncherEvent(this, launcherEvent));
|
new ContainersLauncherEvent(this, launcherEvent));
|
||||||
}
|
}
|
||||||
|
@ -781,6 +786,8 @@ public class ContainerImpl implements Container {
|
||||||
container.sendContainerMonitorStartEvent();
|
container.sendContainerMonitorStartEvent();
|
||||||
container.metrics.runningContainer();
|
container.metrics.runningContainer();
|
||||||
container.wasLaunched = true;
|
container.wasLaunched = true;
|
||||||
|
long duration = clock.getTime() - container.containerLaunchStartTime;
|
||||||
|
container.metrics.addContainerLaunchDuration(duration);
|
||||||
|
|
||||||
if (container.recoveredAsKilled) {
|
if (container.recoveredAsKilled) {
|
||||||
LOG.info("Killing " + container.containerId
|
LOG.info("Killing " + container.containerId
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableCounterInt;
|
import org.apache.hadoop.metrics2.lib.MutableCounterInt;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
import org.apache.hadoop.metrics2.lib.MutableGaugeInt;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableRate;
|
||||||
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
import org.apache.hadoop.metrics2.source.JvmMetrics;
|
||||||
import org.apache.hadoop.yarn.api.records.Resource;
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
|
||||||
|
@ -43,6 +44,8 @@ public class NodeManagerMetrics {
|
||||||
@Metric("Current allocated Virtual Cores")
|
@Metric("Current allocated Virtual Cores")
|
||||||
MutableGaugeInt allocatedVCores;
|
MutableGaugeInt allocatedVCores;
|
||||||
@Metric MutableGaugeInt availableVCores;
|
@Metric MutableGaugeInt availableVCores;
|
||||||
|
@Metric("Container launch duration")
|
||||||
|
MutableRate containerLaunchDuration;
|
||||||
|
|
||||||
public static NodeManagerMetrics create() {
|
public static NodeManagerMetrics create() {
|
||||||
return create(DefaultMetricsSystem.instance());
|
return create(DefaultMetricsSystem.instance());
|
||||||
|
@ -107,7 +110,11 @@ public class NodeManagerMetrics {
|
||||||
availableGB.incr(res.getMemory() / 1024);
|
availableGB.incr(res.getMemory() / 1024);
|
||||||
availableVCores.incr(res.getVirtualCores());
|
availableVCores.incr(res.getVirtualCores());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addContainerLaunchDuration(long value) {
|
||||||
|
containerLaunchDuration.add(value);
|
||||||
|
}
|
||||||
|
|
||||||
public int getRunningContainers() {
|
public int getRunningContainers() {
|
||||||
return containersRunning.value();
|
return containersRunning.value();
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,7 @@ public class TestNodeManagerMetrics {
|
||||||
|
|
||||||
metrics.initingContainer();
|
metrics.initingContainer();
|
||||||
metrics.runningContainer();
|
metrics.runningContainer();
|
||||||
|
metrics.addContainerLaunchDuration(1);
|
||||||
|
|
||||||
checkMetrics(5, 1, 1, 1, 1, 1, 2, 2, 6, 4, 12);
|
checkMetrics(5, 1, 1, 1, 1, 1, 2, 2, 6, 4, 12);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue