From 2aed48a67f408c290e90c83af9f76165d695f91a Mon Sep 17 00:00:00 2001 From: Bikas Saha Date: Tue, 26 Mar 2013 02:34:27 +0000 Subject: [PATCH] YARN-498. Unmanaged AM launcher does not set various constants in env for an AM, also does not handle failed AMs properly (Hitesh Shah via bikas) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1460954 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../distributedshell/ApplicationMaster.java | 22 ++++++-- .../UnmanagedAMLauncher.java | 56 +++++++++++++++++-- .../TestUnmanagedAMLauncher.java | 40 ++++++++++++- 4 files changed, 111 insertions(+), 10 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5a6dda9aad3..f8a4937922a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -136,6 +136,9 @@ Release 2.0.5-beta - UNRELEASED YARN-378. Fix RM to make the AM max attempts/retries to be configurable per application by clients. (Zhijie Shen via vinodkv) + YARN-498. Unmanaged AM launcher does not set various constants in env for + an AM, also does not handle failed AMs properly. (Hitesh Shah via bikas) + Release 2.0.4-alpha - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java index 851bad584cf..1e65a9a73bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/ApplicationMaster.java @@ -319,10 +319,7 @@ public boolean init(String[] args) throws ParseException, IOException { Map envs = System.getenv(); - if (envs.containsKey(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV)) { - appAttemptID = ConverterUtils.toApplicationAttemptId(envs - .get(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV)); - } else if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) { + if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) { if (cliParser.hasOption("app_attempt_id")) { String appIdStr = cliParser.getOptionValue("app_attempt_id", ""); appAttemptID = ConverterUtils.toApplicationAttemptId(appIdStr); @@ -336,6 +333,23 @@ public boolean init(String[] args) throws ParseException, IOException { appAttemptID = containerId.getApplicationAttemptId(); } + if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) { + throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV + + " not set in the environment"); + } + if (!envs.containsKey(ApplicationConstants.NM_HOST_ENV)) { + throw new RuntimeException(ApplicationConstants.NM_HOST_ENV + + " not set in the environment"); + } + if (!envs.containsKey(ApplicationConstants.NM_HTTP_PORT_ENV)) { + throw new RuntimeException(ApplicationConstants.NM_HTTP_PORT_ENV + + " not set in the environment"); + } + if (!envs.containsKey(ApplicationConstants.NM_PORT_ENV)) { + throw new RuntimeException(ApplicationConstants.NM_PORT_ENV + + " not set in the environment"); + } + LOG.info("Application master for app" + ", appId=" + appAttemptID.getApplicationId().getId() + ", clustertimestamp=" + appAttemptID.getApplicationId().getClusterTimestamp() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java index b9e06256a82..62ac67fc170 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/main/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/UnmanagedAMLauncher.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStreamReader; +import java.net.InetAddress; import java.util.ArrayList; import java.util.EnumSet; import java.util.Map; @@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; @@ -81,6 +83,8 @@ public class UnmanagedAMLauncher { // set the classpath explicitly private String classpath = null; + private volatile boolean amCompleted = false; + /** * @param args * Command line arguments @@ -179,8 +183,18 @@ public void launchAM(ApplicationAttemptId attemptId) throws IOException { if(!setClasspath && classpath!=null) { envAMList.add("CLASSPATH="+classpath); } - - envAMList.add(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV + "=" + attemptId); + + ContainerId containerId = Records.newRecord(ContainerId.class); + containerId.setApplicationAttemptId(attemptId); + containerId.setId(0); + + String hostname = InetAddress.getLocalHost().getHostName(); + envAMList.add(ApplicationConstants.AM_CONTAINER_ID_ENV + "=" + containerId); + envAMList.add(ApplicationConstants.NM_HOST_ENV + "=" + hostname); + envAMList.add(ApplicationConstants.NM_HTTP_PORT_ENV + "=0"); + envAMList.add(ApplicationConstants.NM_PORT_ENV + "=0"); + envAMList.add(ApplicationConstants.APP_SUBMIT_TIME_ENV + "=" + + System.currentTimeMillis()); String[] envAM = new String[envAMList.size()]; Process amProc = Runtime.getRuntime().exec(amCmd, envAMList.toArray(envAM)); @@ -233,8 +247,10 @@ public void run() { LOG.info("AM process exited with value: " + exitCode); } catch (InterruptedException e) { e.printStackTrace(); + } finally { + amCompleted = true; } - + try { // make sure that the error thread exits // on Windows these threads sometimes get stuck and hang the execution @@ -306,6 +322,7 @@ public boolean run() throws IOException { appReport = monitorApplication(appId, EnumSet.of( YarnApplicationState.KILLED, YarnApplicationState.FAILED, YarnApplicationState.FINISHED)); + YarnApplicationState appState = appReport.getYarnApplicationState(); FinalApplicationStatus appStatus = appReport.getFinalApplicationStatus(); @@ -341,6 +358,19 @@ public boolean run() throws IOException { private ApplicationReport monitorApplication(ApplicationId appId, Set finalState) throws YarnRemoteException { + long foundAMCompletedTime = 0; + final int timeToWaitMS = 10000; + StringBuilder expectedFinalState = new StringBuilder(); + boolean first = true; + for (YarnApplicationState state : finalState) { + if (first) { + first = false; + expectedFinalState.append(state.name()); + } else { + expectedFinalState.append("," + state.name()); + } + } + while (true) { // Check app status every 1 second. @@ -370,8 +400,24 @@ private ApplicationReport monitorApplication(ApplicationId appId, return report; } + // wait for 10 seconds after process has completed for app report to + // come back + if (amCompleted) { + if (foundAMCompletedTime == 0) { + foundAMCompletedTime = System.currentTimeMillis(); + } else if ((System.currentTimeMillis() - foundAMCompletedTime) + > timeToWaitMS) { + LOG.warn("Waited " + timeToWaitMS/1000 + + " seconds after process completed for AppReport" + + " to reach desired final state. Not waiting anymore." + + "CurrentState = " + state + + ", ExpectedStates = " + expectedFinalState.toString()); + throw new RuntimeException("Failed to receive final expected state" + + " in ApplicationReport" + + ", CurrentState=" + state + + ", ExpectedStates=" + expectedFinalState.toString()); + } + } } - } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java index 3e6e126da1e..43fec24afca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java @@ -18,6 +18,8 @@ package org.apache.hadoop.yarn.applications.unmanagedamlauncher; +import static org.junit.Assert.fail; + import java.io.File; import java.io.FileOutputStream; import java.io.IOException; @@ -99,7 +101,7 @@ public void testDSShell() throws Exception { LOG.fatal("JAVA_HOME not defined. Test not running."); return; } - // start dist-shell with 0 containers because container launch will fail if + // start dist-shell with 0 containers because container launch will fail if // there are no dist cache resources. String[] args = { "--classpath", @@ -125,4 +127,40 @@ public void testDSShell() throws Exception { } + @Test(timeout=30000) + public void testDSShellError() throws Exception { + String classpath = getTestRuntimeClasspath(); + String javaHome = System.getenv("JAVA_HOME"); + if (javaHome == null) { + LOG.fatal("JAVA_HOME not defined. Test not running."); + return; + } + + // remove shell command to make dist-shell fail in initialization itself + String[] args = { + "--classpath", + classpath, + "--queue", + "default", + "--cmd", + javaHome + + "/bin/java -Xmx512m " + + "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster " + + "--container_memory 128 --num_containers 1 --priority 0" }; + + LOG.info("Initializing Launcher"); + UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration( + yarnCluster.getConfig())); + boolean initSuccess = launcher.init(args); + Assert.assertTrue(initSuccess); + LOG.info("Running Launcher"); + + try { + launcher.run(); + fail("Expected an exception to occur as launch should have failed"); + } catch (RuntimeException e) { + // Expected + } + } + }