From 0623ee954a0e6f0d7989f01e44da8fc0df7c989a Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 16 Jul 2013 23:45:29 +0000 Subject: [PATCH] YARN-820. Fixed an invalid state transition in NodeManager caused by failing resource localization. Contributed by Mayank Bansal. svn merge --ignore-ancestry -c 1503947 ../../trunk/ git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1503948 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../container/ContainerImpl.java | 5 +++ .../localizer/LocalizedResource.java | 15 ++++++--- .../container/TestContainer.java | 31 +++++++++++++++++++ 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 695d008a6f1..aaed346e175 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -48,6 +48,9 @@ Release 2.1.1-beta - UNRELEASED YARN-661. Fixed NM to cleanup users' local directories correctly when starting up. (Omkar Vinit Joshi via vinodkv) + YARN-820. Fixed an invalid state transition in NodeManager caused by failing + resource localization. (Mayank Bansal via vinodkv) + Release 2.1.0-beta - 2013-07-02 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 68d52e762e9..c1f3eb95f46 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -290,6 +290,11 @@ public class ContainerImpl implements Container { .addTransition(ContainerState.DONE, ContainerState.DONE, ContainerEventType.UPDATE_DIAGNOSTICS_MSG, UPDATE_DIAGNOSTICS_TRANSITION) + // This transition may result when + // we notify container of failed localization if localizer thread (for + // that container) fails for some reason + .addTransition(ContainerState.DONE, ContainerState.DONE, + ContainerEventType.RESOURCE_FAILED) // create the topology tables .installTopology(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java index 7c22d9fa9f9..f49e942ffd2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalizedResource.java @@ -112,12 +112,17 @@ public class LocalizedResource implements EventHandler { .append(getState() == ResourceState.LOCALIZED ? getLocalPath() + "," + getSize() : "pending").append(",["); - for (ContainerId c : ref) { - sb.append("(").append(c.toString()).append(")"); + try { + this.readLock.lock(); + for (ContainerId c : ref) { + sb.append("(").append(c.toString()).append(")"); + } + sb.append("],").append(getTimestamp()).append(",").append(getState()) + .append("}"); + return sb.toString(); + } finally { + this.readLock.unlock(); } - sb.append("],").append(getTimestamp()).append(",") - .append(getState()).append("}"); - return sb.toString(); } private void release(ContainerId container) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java index b7ee398b04f..d5193071fc7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/TestContainer.java @@ -240,6 +240,32 @@ public class TestContainer { } } } + + @Test + @SuppressWarnings("unchecked") + // mocked generic + public void testLocalizationFailureAtDone() throws Exception { + WrappedContainer wc = null; + try { + wc = new WrappedContainer(6, 314159265358979L, 4344, "yak"); + wc.initContainer(); + wc.localizeResources(); + wc.launchContainer(); + reset(wc.localizerBus); + wc.containerSuccessful(); + wc.containerResourcesCleanup(); + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + // Now in DONE, issue RESOURCE_FAILED as done by LocalizeRunner + wc.resourceFailedContainer(); + // Verify still in DONE + assertEquals(ContainerState.DONE, wc.c.getContainerState()); + verifyCleanupCall(wc); + } finally { + if (wc != null) { + wc.finished(); + } + } + } @Test @SuppressWarnings("unchecked") // mocked generic @@ -624,6 +650,11 @@ public class TestContainer { drainDispatcherEvents(); } + public void resourceFailedContainer() { + c.handle(new ContainerEvent(cId, ContainerEventType.RESOURCE_FAILED)); + drainDispatcherEvents(); + } + // Localize resources // Skip some resources so as to consider them failed public Map> doLocalizeResources(