From 4bff2df13771ca90bc8452b19ae1ba014862b9b4 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Fri, 20 Apr 2018 13:22:20 -0500 Subject: [PATCH] YARN-7786. NullPointerException while launching ApplicationMaster. Contributed by lujie (cherry picked from commit 766544c0b008da9e78bcea6285b2c478653df75a) --- .../amlauncher/AMLauncher.java | 21 +++-- .../TestApplicationMasterLauncher.java | 80 ++++++++++++++++--- 2 files changed, 83 insertions(+), 18 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java index 7051f8cca26..d73b3a89515 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java @@ -105,7 +105,7 @@ public class AMLauncher implements Runnable { connect(); ContainerId masterContainerID = masterContainer.getId(); ApplicationSubmissionContext applicationContext = - application.getSubmissionContext(); + application.getSubmissionContext(); LOG.info("Setting up container " + masterContainer + " for AM " + application.getAppAttemptId()); ContainerLaunchContext launchContext = @@ -189,6 +189,10 @@ public class AMLauncher implements Runnable { ContainerLaunchContext container = applicationMasterContext.getAMContainerSpec(); + if (container == null){ + throw new IOException(containerID + + " has been cleaned before launched"); + } // Finalize the container setupTokens(container, containerID); // set the flow context optionally for timeline service v.2 @@ -305,11 +309,7 @@ public class AMLauncher implements Runnable { handler.handle(new RMAppAttemptEvent(application.getAppAttemptId(), RMAppAttemptEventType.LAUNCHED)); } catch(Exception ie) { - String message = "Error launching " + application.getAppAttemptId() - + ". Got exception: " + StringUtils.stringifyException(ie); - LOG.info(message); - handler.handle(new RMAppAttemptEvent(application - .getAppAttemptId(), RMAppAttemptEventType.LAUNCH_FAILED, message)); + onAMLaunchFailed(masterContainer.getId(), ie); } break; case CLEANUP: @@ -344,4 +344,13 @@ public class AMLauncher implements Runnable { throw (IOException) t; } } + + @SuppressWarnings("unchecked") + protected void onAMLaunchFailed(ContainerId containerId, Exception ie) { + String message = "Error launching " + application.getAppAttemptId() + + ". Got exception: " + StringUtils.stringifyException(ie); + LOG.info(message); + handler.handle(new RMAppAttemptEvent(application + .getAppAttemptId(), RMAppAttemptEventType.LAUNCH_FAILED, message)); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java index 172993b8176..e518b9083af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java @@ -24,12 +24,14 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; @@ -73,6 +75,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncher; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.utils.AMRMClientUtils; @@ -83,6 +86,9 @@ import org.apache.log4j.Logger; import org.junit.Assert; import org.junit.Test; +import com.google.common.base.Supplier; + +import static org.junit.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -216,10 +222,14 @@ public class TestApplicationMasterLauncher { // kick the scheduling nm1.nodeHeartbeat(true); - int waitCount = 0; - while (containerManager.launched == false && waitCount++ < 20) { - LOG.info("Waiting for AM Launch to happen.."); - Thread.sleep(1000); + try { + GenericTestUtils.waitFor(new Supplier() { + @Override public Boolean get() { + return containerManager.launched; + } + }, 100, 200 * 100); + } catch (TimeoutException e) { + fail("timed out while waiting for AM Launch to happen."); } Assert.assertTrue(containerManager.launched); @@ -233,7 +243,7 @@ public class TestApplicationMasterLauncher { .getMasterContainer().getId() .toString(), containerManager.containerIdAtContainerManager); Assert.assertEquals(nm1.getNodeId().toString(), - containerManager.nmHostAtContainerManager); + containerManager.nmHostAtContainerManager); Assert.assertEquals(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS, containerManager.maxAppAttempts); @@ -246,10 +256,14 @@ public class TestApplicationMasterLauncher { nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1, ContainerState.COMPLETE); rm.waitForState(am.getApplicationAttemptId(), RMAppAttemptState.FINISHED); - waitCount = 0; - while (containerManager.cleanedup == false && waitCount++ < 20) { - LOG.info("Waiting for AM Cleanup to happen.."); - Thread.sleep(1000); + try { + GenericTestUtils.waitFor(new Supplier() { + @Override public Boolean get() { + return containerManager.cleanedup; + } + }, 100, 200 * 100); + } catch (TimeoutException e) { + fail("timed out while waiting for AM cleanup to happen."); } Assert.assertTrue(containerManager.cleanedup); @@ -257,6 +271,48 @@ public class TestApplicationMasterLauncher { rm.stop(); } + @Test + public void testAMCleanupBeforeLaunch() throws Exception { + MockRM rm = new MockRM(); + rm.start(); + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5120); + RMApp app = rm.submitApp(2000); + // kick the scheduling + nm1.nodeHeartbeat(true); + RMAppAttempt attempt = app.getCurrentAppAttempt(); + + try { + GenericTestUtils.waitFor(new Supplier() { + @Override public Boolean get() { + return attempt.getMasterContainer() != null; + } + }, 10, 200 * 100); + } catch (TimeoutException e) { + fail("timed out while waiting for AM Launch to happen."); + } + + //send kill before launch + rm.killApp(app.getApplicationId()); + rm.waitForState(app.getApplicationId(), RMAppState.KILLED); + //Launch after kill + AMLauncher launcher = new AMLauncher(rm.getRMContext(), + attempt, AMLauncherEventType.LAUNCH, rm.getConfig()) { + @Override + public void onAMLaunchFailed(ContainerId containerId, Exception e) { + Assert.assertFalse("NullPointerException happens " + + " while launching " + containerId, + e instanceof NullPointerException); + } + @Override + protected ContainerManagementProtocol getContainerMgrProxy( + ContainerId containerId) { + return new MyContainerManagerImpl(); + } + }; + launcher.run(); + rm.stop(); + } + @Test public void testRetriesOnFailures() throws Exception { final ContainerManagementProtocol mockProxy = @@ -303,7 +359,7 @@ public class TestApplicationMasterLauncher { rm.drainEvents(); MockRM.waitForState(app.getCurrentAppAttempt(), - RMAppAttemptState.LAUNCHED, 500); + RMAppAttemptState.LAUNCHED, 500); } @@ -337,9 +393,9 @@ public class TestApplicationMasterLauncher { AllocateResponse amrs = null; try { - amrs = am.allocate(new ArrayList(), + amrs = am.allocate(new ArrayList(), new ArrayList()); - Assert.fail(); + Assert.fail(); } catch (ApplicationMasterNotRegisteredException e) { }