From 9024ad4aa0c2df70d463ac4fd0265a06ed6612f8 Mon Sep 17 00:00:00 2001 From: Sanford Ryza Date: Tue, 11 Feb 2014 20:14:30 +0000 Subject: [PATCH] YARN-1697. NodeManager reports negative running containers (Sandy Ryza) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1567356 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../containermanager/container/ContainerImpl.java | 10 ++++++++-- .../server/nodemanager/metrics/NodeManagerMetrics.java | 4 ++++ .../containermanager/container/TestContainer.java | 3 +++ 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6660a49ca25..14566ab4ba9 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -246,6 +246,8 @@ Release 2.4.0 - UNRELEASED YARN-1698. Fixed default TimelineStore in code to match what is documented in yarn-default.xml (Zhijie Shen via vinodkv) + YARN=1697. NodeManager reports negative running containers (Sandy Ryza) + Release 2.3.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 486f3ce00c5..862e3fa9bcd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -83,6 +83,7 @@ public class ContainerImpl implements Container { private final String user; private int exitCode = ContainerExitStatus.INVALID; private final StringBuilder diagnostics; + private boolean wasLaunched; /** The NM-wide configuration - not specific to this container */ private final Configuration daemonConf; @@ -418,7 +419,9 @@ public class ContainerImpl implements Container { applicationId, containerId); break; case EXITED_WITH_FAILURE: - metrics.endRunningContainer(); + if (wasLaunched) { + metrics.endRunningContainer(); + } // fall through case LOCALIZATION_FAILED: metrics.failedContainer(); @@ -428,7 +431,9 @@ public class ContainerImpl implements Container { applicationId, containerId); break; case CONTAINER_CLEANEDUP_AFTER_KILL: - metrics.endRunningContainer(); + if (wasLaunched) { + metrics.endRunningContainer(); + } // fall through case NEW: metrics.killedContainer(); @@ -636,6 +641,7 @@ public class ContainerImpl implements Container { new ContainerStartMonitoringEvent(container.containerId, vmemBytes, pmemBytes)); container.metrics.runningContainer(); + container.wasLaunched = true; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 4d62247539a..1feb8c70e77 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -99,4 +99,8 @@ public class NodeManagerMetrics { public void addResource(Resource res) { availableGB.incr(res.getMemory() / 1024); } + + public int getRunningContainers() { + return containersRunning.value(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index addb28d83a2..3199fdfeeb6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -348,6 +348,9 @@ public class TestContainer { wc.c.getContainerState()); assertNull(wc.c.getLocalizedResources()); verifyCleanupCall(wc); + wc.c.handle(new ContainerEvent(wc.c.getContainerId(), + ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP)); + assertEquals(0, metrics.getRunningContainers()); } finally { if (wc != null) { wc.finished();