YARN-498. Unmanaged AM launcher does not set various constants in env for an AM, also does not handle failed AMs properly (Hitesh Shah via bikas)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1460954 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Bikas Saha 2013-03-26 02:34:27 +00:00
parent 46315a2d91
commit 2aed48a67f
4 changed files with 111 additions and 10 deletions

View File

@ -136,6 +136,9 @@ Release 2.0.5-beta - UNRELEASED
YARN-378. Fix RM to make the AM max attempts/retries to be configurable YARN-378. Fix RM to make the AM max attempts/retries to be configurable
per application by clients. (Zhijie Shen via vinodkv) per application by clients. (Zhijie Shen via vinodkv)
YARN-498. Unmanaged AM launcher does not set various constants in env for
an AM, also does not handle failed AMs properly. (Hitesh Shah via bikas)
Release 2.0.4-alpha - UNRELEASED Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -319,10 +319,7 @@ public class ApplicationMaster {
Map<String, String> envs = System.getenv(); Map<String, String> envs = System.getenv();
if (envs.containsKey(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV)) { if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) {
appAttemptID = ConverterUtils.toApplicationAttemptId(envs
.get(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV));
} else if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) {
if (cliParser.hasOption("app_attempt_id")) { if (cliParser.hasOption("app_attempt_id")) {
String appIdStr = cliParser.getOptionValue("app_attempt_id", ""); String appIdStr = cliParser.getOptionValue("app_attempt_id", "");
appAttemptID = ConverterUtils.toApplicationAttemptId(appIdStr); appAttemptID = ConverterUtils.toApplicationAttemptId(appIdStr);
@ -336,6 +333,23 @@ public class ApplicationMaster {
appAttemptID = containerId.getApplicationAttemptId(); appAttemptID = containerId.getApplicationAttemptId();
} }
if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) {
throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV
+ " not set in the environment");
}
if (!envs.containsKey(ApplicationConstants.NM_HOST_ENV)) {
throw new RuntimeException(ApplicationConstants.NM_HOST_ENV
+ " not set in the environment");
}
if (!envs.containsKey(ApplicationConstants.NM_HTTP_PORT_ENV)) {
throw new RuntimeException(ApplicationConstants.NM_HTTP_PORT_ENV
+ " not set in the environment");
}
if (!envs.containsKey(ApplicationConstants.NM_PORT_ENV)) {
throw new RuntimeException(ApplicationConstants.NM_PORT_ENV
+ " not set in the environment");
}
LOG.info("Application master for app" + ", appId=" LOG.info("Application master for app" + ", appId="
+ appAttemptID.getApplicationId().getId() + ", clustertimestamp=" + appAttemptID.getApplicationId().getId() + ", clustertimestamp="
+ appAttemptID.getApplicationId().getClusterTimestamp() + appAttemptID.getApplicationId().getClusterTimestamp()

View File

@ -22,6 +22,7 @@ import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.InetAddress;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.EnumSet; import java.util.EnumSet;
import java.util.Map; import java.util.Map;
@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Priority;
@ -81,6 +83,8 @@ public class UnmanagedAMLauncher {
// set the classpath explicitly // set the classpath explicitly
private String classpath = null; private String classpath = null;
private volatile boolean amCompleted = false;
/** /**
* @param args * @param args
* Command line arguments * Command line arguments
@ -179,8 +183,18 @@ public class UnmanagedAMLauncher {
if(!setClasspath && classpath!=null) { if(!setClasspath && classpath!=null) {
envAMList.add("CLASSPATH="+classpath); envAMList.add("CLASSPATH="+classpath);
} }
envAMList.add(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV + "=" + attemptId); ContainerId containerId = Records.newRecord(ContainerId.class);
containerId.setApplicationAttemptId(attemptId);
containerId.setId(0);
String hostname = InetAddress.getLocalHost().getHostName();
envAMList.add(ApplicationConstants.AM_CONTAINER_ID_ENV + "=" + containerId);
envAMList.add(ApplicationConstants.NM_HOST_ENV + "=" + hostname);
envAMList.add(ApplicationConstants.NM_HTTP_PORT_ENV + "=0");
envAMList.add(ApplicationConstants.NM_PORT_ENV + "=0");
envAMList.add(ApplicationConstants.APP_SUBMIT_TIME_ENV + "="
+ System.currentTimeMillis());
String[] envAM = new String[envAMList.size()]; String[] envAM = new String[envAMList.size()];
Process amProc = Runtime.getRuntime().exec(amCmd, envAMList.toArray(envAM)); Process amProc = Runtime.getRuntime().exec(amCmd, envAMList.toArray(envAM));
@ -233,8 +247,10 @@ public class UnmanagedAMLauncher {
LOG.info("AM process exited with value: " + exitCode); LOG.info("AM process exited with value: " + exitCode);
} catch (InterruptedException e) { } catch (InterruptedException e) {
e.printStackTrace(); e.printStackTrace();
} finally {
amCompleted = true;
} }
try { try {
// make sure that the error thread exits // make sure that the error thread exits
// on Windows these threads sometimes get stuck and hang the execution // on Windows these threads sometimes get stuck and hang the execution
@ -306,6 +322,7 @@ public class UnmanagedAMLauncher {
appReport = monitorApplication(appId, EnumSet.of( appReport = monitorApplication(appId, EnumSet.of(
YarnApplicationState.KILLED, YarnApplicationState.FAILED, YarnApplicationState.KILLED, YarnApplicationState.FAILED,
YarnApplicationState.FINISHED)); YarnApplicationState.FINISHED));
YarnApplicationState appState = appReport.getYarnApplicationState(); YarnApplicationState appState = appReport.getYarnApplicationState();
FinalApplicationStatus appStatus = appReport.getFinalApplicationStatus(); FinalApplicationStatus appStatus = appReport.getFinalApplicationStatus();
@ -341,6 +358,19 @@ public class UnmanagedAMLauncher {
private ApplicationReport monitorApplication(ApplicationId appId, private ApplicationReport monitorApplication(ApplicationId appId,
Set<YarnApplicationState> finalState) throws YarnRemoteException { Set<YarnApplicationState> finalState) throws YarnRemoteException {
long foundAMCompletedTime = 0;
final int timeToWaitMS = 10000;
StringBuilder expectedFinalState = new StringBuilder();
boolean first = true;
for (YarnApplicationState state : finalState) {
if (first) {
first = false;
expectedFinalState.append(state.name());
} else {
expectedFinalState.append("," + state.name());
}
}
while (true) { while (true) {
// Check app status every 1 second. // Check app status every 1 second.
@ -370,8 +400,24 @@ public class UnmanagedAMLauncher {
return report; return report;
} }
// wait for 10 seconds after process has completed for app report to
// come back
if (amCompleted) {
if (foundAMCompletedTime == 0) {
foundAMCompletedTime = System.currentTimeMillis();
} else if ((System.currentTimeMillis() - foundAMCompletedTime)
> timeToWaitMS) {
LOG.warn("Waited " + timeToWaitMS/1000
+ " seconds after process completed for AppReport"
+ " to reach desired final state. Not waiting anymore."
+ "CurrentState = " + state
+ ", ExpectedStates = " + expectedFinalState.toString());
throw new RuntimeException("Failed to receive final expected state"
+ " in ApplicationReport"
+ ", CurrentState=" + state
+ ", ExpectedStates=" + expectedFinalState.toString());
}
}
} }
} }
} }

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.yarn.applications.unmanagedamlauncher; package org.apache.hadoop.yarn.applications.unmanagedamlauncher;
import static org.junit.Assert.fail;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
@ -99,7 +101,7 @@ public class TestUnmanagedAMLauncher {
LOG.fatal("JAVA_HOME not defined. Test not running."); LOG.fatal("JAVA_HOME not defined. Test not running.");
return; return;
} }
// start dist-shell with 0 containers because container launch will fail if // start dist-shell with 0 containers because container launch will fail if
// there are no dist cache resources. // there are no dist cache resources.
String[] args = { String[] args = {
"--classpath", "--classpath",
@ -125,4 +127,40 @@ public class TestUnmanagedAMLauncher {
} }
@Test(timeout=30000)
public void testDSShellError() throws Exception {
String classpath = getTestRuntimeClasspath();
String javaHome = System.getenv("JAVA_HOME");
if (javaHome == null) {
LOG.fatal("JAVA_HOME not defined. Test not running.");
return;
}
// remove shell command to make dist-shell fail in initialization itself
String[] args = {
"--classpath",
classpath,
"--queue",
"default",
"--cmd",
javaHome
+ "/bin/java -Xmx512m "
+ "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster "
+ "--container_memory 128 --num_containers 1 --priority 0" };
LOG.info("Initializing Launcher");
UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration(
yarnCluster.getConfig()));
boolean initSuccess = launcher.init(args);
Assert.assertTrue(initSuccess);
LOG.info("Running Launcher");
try {
launcher.run();
fail("Expected an exception to occur as launch should have failed");
} catch (RuntimeException e) {
// Expected
}
}
} }