YARN-498. Unmanaged AM launcher does not set various constants in env for an AM, also does not handle failed AMs properly (Hitesh Shah via bikas)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1460954 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
46315a2d91
commit
2aed48a67f
|
@ -136,6 +136,9 @@ Release 2.0.5-beta - UNRELEASED
|
|||
YARN-378. Fix RM to make the AM max attempts/retries to be configurable
|
||||
per application by clients. (Zhijie Shen via vinodkv)
|
||||
|
||||
YARN-498. Unmanaged AM launcher does not set various constants in env for
|
||||
an AM, also does not handle failed AMs properly. (Hitesh Shah via bikas)
|
||||
|
||||
Release 2.0.4-alpha - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -319,10 +319,7 @@ public class ApplicationMaster {
|
|||
|
||||
Map<String, String> envs = System.getenv();
|
||||
|
||||
if (envs.containsKey(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV)) {
|
||||
appAttemptID = ConverterUtils.toApplicationAttemptId(envs
|
||||
.get(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV));
|
||||
} else if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) {
|
||||
if (!envs.containsKey(ApplicationConstants.AM_CONTAINER_ID_ENV)) {
|
||||
if (cliParser.hasOption("app_attempt_id")) {
|
||||
String appIdStr = cliParser.getOptionValue("app_attempt_id", "");
|
||||
appAttemptID = ConverterUtils.toApplicationAttemptId(appIdStr);
|
||||
|
@ -336,6 +333,23 @@ public class ApplicationMaster {
|
|||
appAttemptID = containerId.getApplicationAttemptId();
|
||||
}
|
||||
|
||||
if (!envs.containsKey(ApplicationConstants.APP_SUBMIT_TIME_ENV)) {
|
||||
throw new RuntimeException(ApplicationConstants.APP_SUBMIT_TIME_ENV
|
||||
+ " not set in the environment");
|
||||
}
|
||||
if (!envs.containsKey(ApplicationConstants.NM_HOST_ENV)) {
|
||||
throw new RuntimeException(ApplicationConstants.NM_HOST_ENV
|
||||
+ " not set in the environment");
|
||||
}
|
||||
if (!envs.containsKey(ApplicationConstants.NM_HTTP_PORT_ENV)) {
|
||||
throw new RuntimeException(ApplicationConstants.NM_HTTP_PORT_ENV
|
||||
+ " not set in the environment");
|
||||
}
|
||||
if (!envs.containsKey(ApplicationConstants.NM_PORT_ENV)) {
|
||||
throw new RuntimeException(ApplicationConstants.NM_PORT_ENV
|
||||
+ " not set in the environment");
|
||||
}
|
||||
|
||||
LOG.info("Application master for app" + ", appId="
|
||||
+ appAttemptID.getApplicationId().getId() + ", clustertimestamp="
|
||||
+ appAttemptID.getApplicationId().getClusterTimestamp()
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.BufferedReader;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.InetAddress;
|
||||
import java.util.ArrayList;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Map;
|
||||
|
@ -41,6 +42,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
|||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
|
@ -81,6 +83,8 @@ public class UnmanagedAMLauncher {
|
|||
// set the classpath explicitly
|
||||
private String classpath = null;
|
||||
|
||||
private volatile boolean amCompleted = false;
|
||||
|
||||
/**
|
||||
* @param args
|
||||
* Command line arguments
|
||||
|
@ -179,8 +183,18 @@ public class UnmanagedAMLauncher {
|
|||
if(!setClasspath && classpath!=null) {
|
||||
envAMList.add("CLASSPATH="+classpath);
|
||||
}
|
||||
|
||||
envAMList.add(ApplicationConstants.AM_APP_ATTEMPT_ID_ENV + "=" + attemptId);
|
||||
|
||||
ContainerId containerId = Records.newRecord(ContainerId.class);
|
||||
containerId.setApplicationAttemptId(attemptId);
|
||||
containerId.setId(0);
|
||||
|
||||
String hostname = InetAddress.getLocalHost().getHostName();
|
||||
envAMList.add(ApplicationConstants.AM_CONTAINER_ID_ENV + "=" + containerId);
|
||||
envAMList.add(ApplicationConstants.NM_HOST_ENV + "=" + hostname);
|
||||
envAMList.add(ApplicationConstants.NM_HTTP_PORT_ENV + "=0");
|
||||
envAMList.add(ApplicationConstants.NM_PORT_ENV + "=0");
|
||||
envAMList.add(ApplicationConstants.APP_SUBMIT_TIME_ENV + "="
|
||||
+ System.currentTimeMillis());
|
||||
|
||||
String[] envAM = new String[envAMList.size()];
|
||||
Process amProc = Runtime.getRuntime().exec(amCmd, envAMList.toArray(envAM));
|
||||
|
@ -233,8 +247,10 @@ public class UnmanagedAMLauncher {
|
|||
LOG.info("AM process exited with value: " + exitCode);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
amCompleted = true;
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
// make sure that the error thread exits
|
||||
// on Windows these threads sometimes get stuck and hang the execution
|
||||
|
@ -306,6 +322,7 @@ public class UnmanagedAMLauncher {
|
|||
appReport = monitorApplication(appId, EnumSet.of(
|
||||
YarnApplicationState.KILLED, YarnApplicationState.FAILED,
|
||||
YarnApplicationState.FINISHED));
|
||||
|
||||
YarnApplicationState appState = appReport.getYarnApplicationState();
|
||||
FinalApplicationStatus appStatus = appReport.getFinalApplicationStatus();
|
||||
|
||||
|
@ -341,6 +358,19 @@ public class UnmanagedAMLauncher {
|
|||
private ApplicationReport monitorApplication(ApplicationId appId,
|
||||
Set<YarnApplicationState> finalState) throws YarnRemoteException {
|
||||
|
||||
long foundAMCompletedTime = 0;
|
||||
final int timeToWaitMS = 10000;
|
||||
StringBuilder expectedFinalState = new StringBuilder();
|
||||
boolean first = true;
|
||||
for (YarnApplicationState state : finalState) {
|
||||
if (first) {
|
||||
first = false;
|
||||
expectedFinalState.append(state.name());
|
||||
} else {
|
||||
expectedFinalState.append("," + state.name());
|
||||
}
|
||||
}
|
||||
|
||||
while (true) {
|
||||
|
||||
// Check app status every 1 second.
|
||||
|
@ -370,8 +400,24 @@ public class UnmanagedAMLauncher {
|
|||
return report;
|
||||
}
|
||||
|
||||
// wait for 10 seconds after process has completed for app report to
|
||||
// come back
|
||||
if (amCompleted) {
|
||||
if (foundAMCompletedTime == 0) {
|
||||
foundAMCompletedTime = System.currentTimeMillis();
|
||||
} else if ((System.currentTimeMillis() - foundAMCompletedTime)
|
||||
> timeToWaitMS) {
|
||||
LOG.warn("Waited " + timeToWaitMS/1000
|
||||
+ " seconds after process completed for AppReport"
|
||||
+ " to reach desired final state. Not waiting anymore."
|
||||
+ "CurrentState = " + state
|
||||
+ ", ExpectedStates = " + expectedFinalState.toString());
|
||||
throw new RuntimeException("Failed to receive final expected state"
|
||||
+ " in ApplicationReport"
|
||||
+ ", CurrentState=" + state
|
||||
+ ", ExpectedStates=" + expectedFinalState.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
|
||||
package org.apache.hadoop.yarn.applications.unmanagedamlauncher;
|
||||
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
@ -99,7 +101,7 @@ public class TestUnmanagedAMLauncher {
|
|||
LOG.fatal("JAVA_HOME not defined. Test not running.");
|
||||
return;
|
||||
}
|
||||
// start dist-shell with 0 containers because container launch will fail if
|
||||
// start dist-shell with 0 containers because container launch will fail if
|
||||
// there are no dist cache resources.
|
||||
String[] args = {
|
||||
"--classpath",
|
||||
|
@ -125,4 +127,40 @@ public class TestUnmanagedAMLauncher {
|
|||
|
||||
}
|
||||
|
||||
@Test(timeout=30000)
|
||||
public void testDSShellError() throws Exception {
|
||||
String classpath = getTestRuntimeClasspath();
|
||||
String javaHome = System.getenv("JAVA_HOME");
|
||||
if (javaHome == null) {
|
||||
LOG.fatal("JAVA_HOME not defined. Test not running.");
|
||||
return;
|
||||
}
|
||||
|
||||
// remove shell command to make dist-shell fail in initialization itself
|
||||
String[] args = {
|
||||
"--classpath",
|
||||
classpath,
|
||||
"--queue",
|
||||
"default",
|
||||
"--cmd",
|
||||
javaHome
|
||||
+ "/bin/java -Xmx512m "
|
||||
+ "org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster "
|
||||
+ "--container_memory 128 --num_containers 1 --priority 0" };
|
||||
|
||||
LOG.info("Initializing Launcher");
|
||||
UnmanagedAMLauncher launcher = new UnmanagedAMLauncher(new Configuration(
|
||||
yarnCluster.getConfig()));
|
||||
boolean initSuccess = launcher.init(args);
|
||||
Assert.assertTrue(initSuccess);
|
||||
LOG.info("Running Launcher");
|
||||
|
||||
try {
|
||||
launcher.run();
|
||||
fail("Expected an exception to occur as launch should have failed");
|
||||
} catch (RuntimeException e) {
|
||||
// Expected
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue