YARN-9428. Add metrics for paused containers in NodeManager. Contributed by Abhishek Modi.
This commit is contained in:
parent
da7f8c244d
commit
ab2bda57bd
|
@ -161,6 +161,7 @@ public class ContainerImpl implements Container {
|
||||||
private final StringBuilder diagnostics;
|
private final StringBuilder diagnostics;
|
||||||
private final int diagnosticsMaxSize;
|
private final int diagnosticsMaxSize;
|
||||||
private boolean wasLaunched;
|
private boolean wasLaunched;
|
||||||
|
private boolean wasPaused;
|
||||||
private long containerLocalizationStartTime;
|
private long containerLocalizationStartTime;
|
||||||
private long containerLaunchStartTime;
|
private long containerLaunchStartTime;
|
||||||
private ContainerMetrics containerMetrics;
|
private ContainerMetrics containerMetrics;
|
||||||
|
@ -1541,6 +1542,7 @@ public class ContainerImpl implements Container {
|
||||||
public void transition(ContainerImpl container, ContainerEvent event) {
|
public void transition(ContainerImpl container, ContainerEvent event) {
|
||||||
container.sendContainerMonitorStartEvent();
|
container.sendContainerMonitorStartEvent();
|
||||||
container.wasLaunched = true;
|
container.wasLaunched = true;
|
||||||
|
container.setIsPaused(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1561,6 +1563,7 @@ public class ContainerImpl implements Container {
|
||||||
public void transition(ContainerImpl container, ContainerEvent event) {
|
public void transition(ContainerImpl container, ContainerEvent event) {
|
||||||
|
|
||||||
container.setIsReInitializing(false);
|
container.setIsReInitializing(false);
|
||||||
|
container.setIsPaused(false);
|
||||||
// Set exit code to 0 on success
|
// Set exit code to 0 on success
|
||||||
container.exitCode = 0;
|
container.exitCode = 0;
|
||||||
|
|
||||||
|
@ -1591,6 +1594,7 @@ public class ContainerImpl implements Container {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void transition(ContainerImpl container, ContainerEvent event) {
|
public void transition(ContainerImpl container, ContainerEvent event) {
|
||||||
|
container.setIsPaused(false);
|
||||||
container.setIsReInitializing(false);
|
container.setIsReInitializing(false);
|
||||||
ContainerExitEvent exitEvent = (ContainerExitEvent) event;
|
ContainerExitEvent exitEvent = (ContainerExitEvent) event;
|
||||||
container.exitCode = exitEvent.getExitCode();
|
container.exitCode = exitEvent.getExitCode();
|
||||||
|
@ -1835,6 +1839,7 @@ public class ContainerImpl implements Container {
|
||||||
public void transition(ContainerImpl container, ContainerEvent event) {
|
public void transition(ContainerImpl container, ContainerEvent event) {
|
||||||
// Kill the process/process-grp
|
// Kill the process/process-grp
|
||||||
container.setIsReInitializing(false);
|
container.setIsReInitializing(false);
|
||||||
|
container.setIsPaused(false);
|
||||||
container.dispatcher.getEventHandler().handle(
|
container.dispatcher.getEventHandler().handle(
|
||||||
new ContainersLauncherEvent(container,
|
new ContainersLauncherEvent(container,
|
||||||
ContainersLauncherEventType.CLEANUP_CONTAINER));
|
ContainersLauncherEventType.CLEANUP_CONTAINER));
|
||||||
|
@ -2080,6 +2085,8 @@ public class ContainerImpl implements Container {
|
||||||
SingleArcTransition<ContainerImpl, ContainerEvent> {
|
SingleArcTransition<ContainerImpl, ContainerEvent> {
|
||||||
@Override
|
@Override
|
||||||
public void transition(ContainerImpl container, ContainerEvent event) {
|
public void transition(ContainerImpl container, ContainerEvent event) {
|
||||||
|
container.setIsPaused(true);
|
||||||
|
container.metrics.pausedContainer();
|
||||||
// Container was PAUSED so tell the scheduler
|
// Container was PAUSED so tell the scheduler
|
||||||
container.dispatcher.getEventHandler().handle(
|
container.dispatcher.getEventHandler().handle(
|
||||||
new ContainerSchedulerEvent(container,
|
new ContainerSchedulerEvent(container,
|
||||||
|
@ -2096,6 +2103,7 @@ public class ContainerImpl implements Container {
|
||||||
SingleArcTransition<ContainerImpl, ContainerEvent> {
|
SingleArcTransition<ContainerImpl, ContainerEvent> {
|
||||||
@Override
|
@Override
|
||||||
public void transition(ContainerImpl container, ContainerEvent event) {
|
public void transition(ContainerImpl container, ContainerEvent event) {
|
||||||
|
container.setIsPaused(false);
|
||||||
// Pause the process/process-grp if it is supported by the container
|
// Pause the process/process-grp if it is supported by the container
|
||||||
container.dispatcher.getEventHandler().handle(
|
container.dispatcher.getEventHandler().handle(
|
||||||
new ContainersLauncherEvent(container,
|
new ContainersLauncherEvent(container,
|
||||||
|
@ -2154,6 +2162,13 @@ public class ContainerImpl implements Container {
|
||||||
return container.resourceSet.getResourcesUploadPolicies().get(resource);
|
return container.resourceSet.getResourcesUploadPolicies().get(resource);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void setIsPaused(boolean paused) {
|
||||||
|
if (this.wasPaused && !paused) {
|
||||||
|
this.metrics.endPausedContainer();
|
||||||
|
}
|
||||||
|
this.wasPaused = paused;
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
ContainerRetryContext getContainerRetryContext() {
|
ContainerRetryContext getContainerRetryContext() {
|
||||||
return containerRetryContext;
|
return containerRetryContext;
|
||||||
|
|
|
@ -44,6 +44,7 @@ public class NodeManagerMetrics {
|
||||||
@Metric("# of initializing containers")
|
@Metric("# of initializing containers")
|
||||||
MutableGaugeInt containersIniting;
|
MutableGaugeInt containersIniting;
|
||||||
@Metric MutableGaugeInt containersRunning;
|
@Metric MutableGaugeInt containersRunning;
|
||||||
|
@Metric("# of paused containers") MutableGaugeInt containersPaused;
|
||||||
@Metric("Current allocated memory in GB")
|
@Metric("Current allocated memory in GB")
|
||||||
MutableGaugeInt allocatedGB;
|
MutableGaugeInt allocatedGB;
|
||||||
@Metric("Current # of allocated containers")
|
@Metric("Current # of allocated containers")
|
||||||
|
@ -168,6 +169,14 @@ public class NodeManagerMetrics {
|
||||||
containersReIniting.decr();
|
containersReIniting.decr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void pausedContainer() {
|
||||||
|
containersPaused.incr();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void endPausedContainer() {
|
||||||
|
containersPaused.decr();
|
||||||
|
}
|
||||||
|
|
||||||
public void allocateContainer(Resource res) {
|
public void allocateContainer(Resource res) {
|
||||||
allocatedContainers.incr();
|
allocatedContainers.incr();
|
||||||
allocatedMB = allocatedMB + res.getMemorySize();
|
allocatedMB = allocatedMB + res.getMemorySize();
|
||||||
|
@ -268,6 +277,10 @@ public class NodeManagerMetrics {
|
||||||
return containersRunning.value();
|
return containersRunning.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getPausedContainers() {
|
||||||
|
return containersPaused.value();
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public int getKilledContainers() {
|
public int getKilledContainers() {
|
||||||
return containersKilled.value();
|
return containersKilled.value();
|
||||||
|
|
|
@ -246,13 +246,16 @@ public class TestContainer {
|
||||||
wc.initContainer();
|
wc.initContainer();
|
||||||
wc.localizeResources();
|
wc.localizeResources();
|
||||||
int running = metrics.getRunningContainers();
|
int running = metrics.getRunningContainers();
|
||||||
|
int paused = metrics.getPausedContainers();
|
||||||
wc.launchContainer();
|
wc.launchContainer();
|
||||||
assertEquals(running + 1, metrics.getRunningContainers());
|
assertEquals(running + 1, metrics.getRunningContainers());
|
||||||
reset(wc.localizerBus);
|
reset(wc.localizerBus);
|
||||||
wc.pauseContainer();
|
wc.pauseContainer();
|
||||||
assertEquals(ContainerState.PAUSED,
|
assertEquals(ContainerState.PAUSED,
|
||||||
wc.c.getContainerState());
|
wc.c.getContainerState());
|
||||||
|
assertEquals(paused + 1, metrics.getPausedContainers());
|
||||||
wc.resumeContainer();
|
wc.resumeContainer();
|
||||||
|
assertEquals(paused, metrics.getPausedContainers());
|
||||||
assertEquals(ContainerState.RUNNING,
|
assertEquals(ContainerState.RUNNING,
|
||||||
wc.c.getContainerState());
|
wc.c.getContainerState());
|
||||||
wc.containerKilledOnRequest();
|
wc.containerKilledOnRequest();
|
||||||
|
|
Loading…
Reference in New Issue