YARN-9428. Add metrics for paused containers in NodeManager. Contributed by Abhishek Modi.

This commit is contained in:
Giovanni Matteo Fumarola 2019-04-01 14:21:17 -07:00
parent da7f8c244d
commit ab2bda57bd
3 changed files with 31 additions and 0 deletions

View File

@ -161,6 +161,7 @@ public class ContainerImpl implements Container {
private final StringBuilder diagnostics; private final StringBuilder diagnostics;
private final int diagnosticsMaxSize; private final int diagnosticsMaxSize;
private boolean wasLaunched; private boolean wasLaunched;
private boolean wasPaused;
private long containerLocalizationStartTime; private long containerLocalizationStartTime;
private long containerLaunchStartTime; private long containerLaunchStartTime;
private ContainerMetrics containerMetrics; private ContainerMetrics containerMetrics;
@ -1541,6 +1542,7 @@ public class ContainerImpl implements Container {
public void transition(ContainerImpl container, ContainerEvent event) { public void transition(ContainerImpl container, ContainerEvent event) {
container.sendContainerMonitorStartEvent(); container.sendContainerMonitorStartEvent();
container.wasLaunched = true; container.wasLaunched = true;
container.setIsPaused(true);
} }
} }
@ -1561,6 +1563,7 @@ public class ContainerImpl implements Container {
public void transition(ContainerImpl container, ContainerEvent event) { public void transition(ContainerImpl container, ContainerEvent event) {
container.setIsReInitializing(false); container.setIsReInitializing(false);
container.setIsPaused(false);
// Set exit code to 0 on success // Set exit code to 0 on success
container.exitCode = 0; container.exitCode = 0;
@ -1591,6 +1594,7 @@ public class ContainerImpl implements Container {
@Override @Override
public void transition(ContainerImpl container, ContainerEvent event) { public void transition(ContainerImpl container, ContainerEvent event) {
container.setIsPaused(false);
container.setIsReInitializing(false); container.setIsReInitializing(false);
ContainerExitEvent exitEvent = (ContainerExitEvent) event; ContainerExitEvent exitEvent = (ContainerExitEvent) event;
container.exitCode = exitEvent.getExitCode(); container.exitCode = exitEvent.getExitCode();
@ -1835,6 +1839,7 @@ public class ContainerImpl implements Container {
public void transition(ContainerImpl container, ContainerEvent event) { public void transition(ContainerImpl container, ContainerEvent event) {
// Kill the process/process-grp // Kill the process/process-grp
container.setIsReInitializing(false); container.setIsReInitializing(false);
container.setIsPaused(false);
container.dispatcher.getEventHandler().handle( container.dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(container, new ContainersLauncherEvent(container,
ContainersLauncherEventType.CLEANUP_CONTAINER)); ContainersLauncherEventType.CLEANUP_CONTAINER));
@ -2080,6 +2085,8 @@ public class ContainerImpl implements Container {
SingleArcTransition<ContainerImpl, ContainerEvent> { SingleArcTransition<ContainerImpl, ContainerEvent> {
@Override @Override
public void transition(ContainerImpl container, ContainerEvent event) { public void transition(ContainerImpl container, ContainerEvent event) {
container.setIsPaused(true);
container.metrics.pausedContainer();
// Container was PAUSED so tell the scheduler // Container was PAUSED so tell the scheduler
container.dispatcher.getEventHandler().handle( container.dispatcher.getEventHandler().handle(
new ContainerSchedulerEvent(container, new ContainerSchedulerEvent(container,
@ -2096,6 +2103,7 @@ public class ContainerImpl implements Container {
SingleArcTransition<ContainerImpl, ContainerEvent> { SingleArcTransition<ContainerImpl, ContainerEvent> {
@Override @Override
public void transition(ContainerImpl container, ContainerEvent event) { public void transition(ContainerImpl container, ContainerEvent event) {
container.setIsPaused(false);
// Pause the process/process-grp if it is supported by the container // Pause the process/process-grp if it is supported by the container
container.dispatcher.getEventHandler().handle( container.dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(container, new ContainersLauncherEvent(container,
@ -2154,6 +2162,13 @@ public class ContainerImpl implements Container {
return container.resourceSet.getResourcesUploadPolicies().get(resource); return container.resourceSet.getResourcesUploadPolicies().get(resource);
} }
private void setIsPaused(boolean paused) {
if (this.wasPaused && !paused) {
this.metrics.endPausedContainer();
}
this.wasPaused = paused;
}
@VisibleForTesting @VisibleForTesting
ContainerRetryContext getContainerRetryContext() { ContainerRetryContext getContainerRetryContext() {
return containerRetryContext; return containerRetryContext;

View File

@ -44,6 +44,7 @@ public class NodeManagerMetrics {
@Metric("# of initializing containers") @Metric("# of initializing containers")
MutableGaugeInt containersIniting; MutableGaugeInt containersIniting;
@Metric MutableGaugeInt containersRunning; @Metric MutableGaugeInt containersRunning;
@Metric("# of paused containers") MutableGaugeInt containersPaused;
@Metric("Current allocated memory in GB") @Metric("Current allocated memory in GB")
MutableGaugeInt allocatedGB; MutableGaugeInt allocatedGB;
@Metric("Current # of allocated containers") @Metric("Current # of allocated containers")
@ -168,6 +169,14 @@ public class NodeManagerMetrics {
containersReIniting.decr(); containersReIniting.decr();
} }
public void pausedContainer() {
containersPaused.incr();
}
public void endPausedContainer() {
containersPaused.decr();
}
public void allocateContainer(Resource res) { public void allocateContainer(Resource res) {
allocatedContainers.incr(); allocatedContainers.incr();
allocatedMB = allocatedMB + res.getMemorySize(); allocatedMB = allocatedMB + res.getMemorySize();
@ -268,6 +277,10 @@ public class NodeManagerMetrics {
return containersRunning.value(); return containersRunning.value();
} }
public int getPausedContainers() {
return containersPaused.value();
}
@VisibleForTesting @VisibleForTesting
public int getKilledContainers() { public int getKilledContainers() {
return containersKilled.value(); return containersKilled.value();

View File

@ -246,13 +246,16 @@ public class TestContainer {
wc.initContainer(); wc.initContainer();
wc.localizeResources(); wc.localizeResources();
int running = metrics.getRunningContainers(); int running = metrics.getRunningContainers();
int paused = metrics.getPausedContainers();
wc.launchContainer(); wc.launchContainer();
assertEquals(running + 1, metrics.getRunningContainers()); assertEquals(running + 1, metrics.getRunningContainers());
reset(wc.localizerBus); reset(wc.localizerBus);
wc.pauseContainer(); wc.pauseContainer();
assertEquals(ContainerState.PAUSED, assertEquals(ContainerState.PAUSED,
wc.c.getContainerState()); wc.c.getContainerState());
assertEquals(paused + 1, metrics.getPausedContainers());
wc.resumeContainer(); wc.resumeContainer();
assertEquals(paused, metrics.getPausedContainers());
assertEquals(ContainerState.RUNNING, assertEquals(ContainerState.RUNNING,
wc.c.getContainerState()); wc.c.getContainerState());
wc.containerKilledOnRequest(); wc.containerKilledOnRequest();