YARN-10536. Client in distributedShell swallows interrupt exceptions (#2554)
This commit is contained in:
parent
c2672bb234
commit
7a88f45366
|
@ -144,6 +144,9 @@ public class Client {
|
||||||
private static final int DEFAULT_CONTAINER_MEMORY = 10;
|
private static final int DEFAULT_CONTAINER_MEMORY = 10;
|
||||||
private static final int DEFAULT_CONTAINER_VCORES = 1;
|
private static final int DEFAULT_CONTAINER_VCORES = 1;
|
||||||
|
|
||||||
|
// check the application once per second.
|
||||||
|
private static final int APP_MONITOR_INTERVAL = 1000;
|
||||||
|
|
||||||
// Configuration
|
// Configuration
|
||||||
private Configuration conf;
|
private Configuration conf;
|
||||||
private YarnClient yarnClient;
|
private YarnClient yarnClient;
|
||||||
|
@ -209,7 +212,7 @@ public class Client {
|
||||||
private String rollingFilesPattern = "";
|
private String rollingFilesPattern = "";
|
||||||
|
|
||||||
// Start time for client
|
// Start time for client
|
||||||
private final long clientStartTime = System.currentTimeMillis();
|
private long clientStartTime = System.currentTimeMillis();
|
||||||
// Timeout threshold for client. Kill app after time interval expires.
|
// Timeout threshold for client. Kill app after time interval expires.
|
||||||
private long clientTimeout = 600000;
|
private long clientTimeout = 600000;
|
||||||
|
|
||||||
|
@ -670,6 +673,8 @@ public class Client {
|
||||||
|
|
||||||
LOG.info("Running Client");
|
LOG.info("Running Client");
|
||||||
yarnClient.start();
|
yarnClient.start();
|
||||||
|
// set the client start time.
|
||||||
|
clientStartTime = System.currentTimeMillis();
|
||||||
|
|
||||||
YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
|
YarnClusterMetrics clusterMetrics = yarnClient.getYarnClusterMetrics();
|
||||||
LOG.info("Got Cluster metric info from ASM"
|
LOG.info("Got Cluster metric info from ASM"
|
||||||
|
@ -983,7 +988,6 @@ public class Client {
|
||||||
if (keepContainers) {
|
if (keepContainers) {
|
||||||
vargs.add("--keep_containers_across_application_attempts");
|
vargs.add("--keep_containers_across_application_attempts");
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
|
for (Map.Entry<String, String> entry : shellEnv.entrySet()) {
|
||||||
vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
|
vargs.add("--shell_env " + entry.getKey() + "=" + entry.getValue());
|
||||||
}
|
}
|
||||||
|
@ -1110,13 +1114,17 @@ public class Client {
|
||||||
private boolean monitorApplication(ApplicationId appId)
|
private boolean monitorApplication(ApplicationId appId)
|
||||||
throws YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
|
|
||||||
|
boolean res = false;
|
||||||
|
boolean needForceKill = false;
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|
||||||
// Check app status every 1 second.
|
// Check app status every 1 second.
|
||||||
try {
|
try {
|
||||||
Thread.sleep(1000);
|
Thread.sleep(APP_MONITOR_INTERVAL);
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
LOG.debug("Thread sleep in monitoring loop interrupted");
|
LOG.warn("Thread sleep in monitoring loop interrupted");
|
||||||
|
// if the application is to be killed when client times out;
|
||||||
|
// then set needForceKill to true
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get application report for the appId we are interested in
|
// Get application report for the appId we are interested in
|
||||||
|
@ -1139,22 +1147,20 @@ public class Client {
|
||||||
FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
|
FinalApplicationStatus dsStatus = report.getFinalApplicationStatus();
|
||||||
if (YarnApplicationState.FINISHED == state) {
|
if (YarnApplicationState.FINISHED == state) {
|
||||||
if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
|
if (FinalApplicationStatus.SUCCEEDED == dsStatus) {
|
||||||
LOG.info("Application has completed successfully. Breaking monitoring loop");
|
LOG.info("Application has completed successfully. "
|
||||||
return true;
|
+ "Breaking monitoring loop");
|
||||||
|
res = true;
|
||||||
|
} else {
|
||||||
|
LOG.info("Application did finished unsuccessfully. "
|
||||||
|
+ "YarnState={}, DSFinalStatus={}. Breaking monitoring loop",
|
||||||
|
state, dsStatus);
|
||||||
}
|
}
|
||||||
else {
|
break;
|
||||||
LOG.info("Application did finished unsuccessfully."
|
} else if (YarnApplicationState.KILLED == state
|
||||||
+ " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
|
|
||||||
+ ". Breaking monitoring loop");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (YarnApplicationState.KILLED == state
|
|
||||||
|| YarnApplicationState.FAILED == state) {
|
|| YarnApplicationState.FAILED == state) {
|
||||||
LOG.info("Application did not finish."
|
LOG.info("Application did not finish. YarnState={}, DSFinalStatus={}. "
|
||||||
+ " YarnState=" + state.toString() + ", DSFinalStatus=" + dsStatus.toString()
|
+ "Breaking monitoring loop", state, dsStatus);
|
||||||
+ ". Breaking monitoring loop");
|
break;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// The value equal or less than 0 means no timeout
|
// The value equal or less than 0 means no timeout
|
||||||
|
@ -1162,11 +1168,16 @@ public class Client {
|
||||||
&& System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
|
&& System.currentTimeMillis() > (clientStartTime + clientTimeout)) {
|
||||||
LOG.info("Reached client specified timeout for application. " +
|
LOG.info("Reached client specified timeout for application. " +
|
||||||
"Killing application");
|
"Killing application");
|
||||||
forceKillApplication(appId);
|
needForceKill = true;
|
||||||
return false;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (needForceKill) {
|
||||||
|
forceKillApplication(appId);
|
||||||
|
}
|
||||||
|
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -107,6 +107,7 @@ import org.junit.Before;
|
||||||
import org.junit.Rule;
|
import org.junit.Rule;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.rules.TemporaryFolder;
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
import org.junit.rules.TestName;
|
||||||
import org.junit.rules.Timeout;
|
import org.junit.rules.Timeout;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -139,6 +140,13 @@ public class TestDistributedShell {
|
||||||
@Rule
|
@Rule
|
||||||
public TemporaryFolder tmpFolder = new TemporaryFolder();
|
public TemporaryFolder tmpFolder = new TemporaryFolder();
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public TestName name = new TestName();
|
||||||
|
|
||||||
|
private String generateAppName() {
|
||||||
|
return name.getMethodName().replaceFirst("test", "");
|
||||||
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws Exception {
|
public void setup() throws Exception {
|
||||||
setupInternal(NUM_NMS, timelineVersionWatcher.getTimelineVersion(),
|
setupInternal(NUM_NMS, timelineVersionWatcher.getTimelineVersion(),
|
||||||
|
@ -738,6 +746,8 @@ public class TestDistributedShell {
|
||||||
@Test
|
@Test
|
||||||
public void testDSRestartWithPreviousRunningContainers() throws Exception {
|
public void testDSRestartWithPreviousRunningContainers() throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -773,6 +783,8 @@ public class TestDistributedShell {
|
||||||
@Test
|
@Test
|
||||||
public void testDSAttemptFailuresValidityIntervalSucess() throws Exception {
|
public void testDSAttemptFailuresValidityIntervalSucess() throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -811,6 +823,8 @@ public class TestDistributedShell {
|
||||||
@Test
|
@Test
|
||||||
public void testDSAttemptFailuresValidityIntervalFailed() throws Exception {
|
public void testDSAttemptFailuresValidityIntervalFailed() throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -858,6 +872,8 @@ public class TestDistributedShell {
|
||||||
fileWriter.write("log4j.rootLogger=debug,stdout");
|
fileWriter.write("log4j.rootLogger=debug,stdout");
|
||||||
fileWriter.close();
|
fileWriter.close();
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -907,6 +923,8 @@ public class TestDistributedShell {
|
||||||
public void testSpecifyingLogAggregationContext() throws Exception {
|
public void testSpecifyingLogAggregationContext() throws Exception {
|
||||||
String regex = ".*(foo|bar)\\d";
|
String regex = ".*(foo|bar)\\d";
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--shell_command",
|
"--shell_command",
|
||||||
|
@ -929,6 +947,8 @@ public class TestDistributedShell {
|
||||||
public void testDSShellWithCommands() throws Exception {
|
public void testDSShellWithCommands() throws Exception {
|
||||||
|
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -961,6 +981,8 @@ public class TestDistributedShell {
|
||||||
@Test
|
@Test
|
||||||
public void testDSShellWithMultipleArgs() throws Exception {
|
public void testDSShellWithMultipleArgs() throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1011,6 +1033,8 @@ public class TestDistributedShell {
|
||||||
fileWriter.close();
|
fileWriter.close();
|
||||||
System.out.println(customShellScript.getAbsolutePath());
|
System.out.println(customShellScript.getAbsolutePath());
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1056,6 +1080,8 @@ public class TestDistributedShell {
|
||||||
LOG.info("Initializing DS Client with no jar file");
|
LOG.info("Initializing DS Client with no jar file");
|
||||||
try {
|
try {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
"2",
|
"2",
|
||||||
"--shell_command",
|
"--shell_command",
|
||||||
|
@ -1264,6 +1290,8 @@ public class TestDistributedShell {
|
||||||
@Test
|
@Test
|
||||||
public void testContainerLaunchFailureHandling() throws Exception {
|
public void testContainerLaunchFailureHandling() throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1292,6 +1320,8 @@ public class TestDistributedShell {
|
||||||
@Test
|
@Test
|
||||||
public void testDebugFlag() throws Exception {
|
public void testDebugFlag() throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1389,14 +1419,18 @@ public class TestDistributedShell {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDistributedShellResourceProfiles() throws Exception {
|
public void testDistributedShellResourceProfiles() throws Exception {
|
||||||
|
String appName = generateAppName();
|
||||||
String[][] args = {
|
String[][] args = {
|
||||||
{"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command",
|
{"--appname", appName + "-0", "--jar", APPMASTER_JAR,
|
||||||
|
"--num_containers", "1", "--shell_command",
|
||||||
Shell.WINDOWS ? "dir" : "ls", "--container_resource_profile",
|
Shell.WINDOWS ? "dir" : "ls", "--container_resource_profile",
|
||||||
"maximum" },
|
"maximum" },
|
||||||
{"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command",
|
{"--appname", appName + "-1", "--jar", APPMASTER_JAR,
|
||||||
|
"--num_containers", "1", "--shell_command",
|
||||||
Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile",
|
Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile",
|
||||||
"default" },
|
"default" },
|
||||||
{"--jar", APPMASTER_JAR, "--num_containers", "1", "--shell_command",
|
{"--appname", appName + "-2", "--jar", APPMASTER_JAR,
|
||||||
|
"--num_containers", "1", "--shell_command",
|
||||||
Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile",
|
Shell.WINDOWS ? "dir" : "ls", "--master_resource_profile",
|
||||||
"default", "--container_resource_profile", "maximum" }
|
"default", "--container_resource_profile", "maximum" }
|
||||||
};
|
};
|
||||||
|
@ -1420,6 +1454,8 @@ public class TestDistributedShell {
|
||||||
Client client = new Client(new Configuration(yarnCluster.getConfig()));
|
Client client = new Client(new Configuration(yarnCluster.getConfig()));
|
||||||
try {
|
try {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1450,6 +1486,8 @@ public class TestDistributedShell {
|
||||||
Client client = new Client(new Configuration(yarnCluster.getConfig()));
|
Client client = new Client(new Configuration(yarnCluster.getConfig()));
|
||||||
try {
|
try {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1570,6 +1608,8 @@ public class TestDistributedShell {
|
||||||
}
|
}
|
||||||
|
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1651,6 +1691,8 @@ public class TestDistributedShell {
|
||||||
public void testDistributedShellAMResourcesWithIllegalArguments()
|
public void testDistributedShellAMResourcesWithIllegalArguments()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1668,6 +1710,8 @@ public class TestDistributedShell {
|
||||||
public void testDistributedShellAMResourcesWithMissingArgumentValue()
|
public void testDistributedShellAMResourcesWithMissingArgumentValue()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1684,6 +1728,8 @@ public class TestDistributedShell {
|
||||||
public void testDistributedShellAMResourcesWithUnknownResource()
|
public void testDistributedShellAMResourcesWithUnknownResource()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1702,6 +1748,8 @@ public class TestDistributedShell {
|
||||||
public void testDistributedShellNonExistentQueue()
|
public void testDistributedShellNonExistentQueue()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1720,6 +1768,8 @@ public class TestDistributedShell {
|
||||||
public void testDistributedShellWithSingleFileLocalization()
|
public void testDistributedShellWithSingleFileLocalization()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1741,6 +1791,8 @@ public class TestDistributedShell {
|
||||||
public void testDistributedShellWithMultiFileLocalization()
|
public void testDistributedShellWithMultiFileLocalization()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1762,6 +1814,8 @@ public class TestDistributedShell {
|
||||||
public void testDistributedShellWithNonExistentFileLocalization()
|
public void testDistributedShellWithNonExistentFileLocalization()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
|
@ -1785,14 +1839,14 @@ public class TestDistributedShell {
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String appName = "DistributedShellCleanup";
|
String appName = "DistributedShellCleanup";
|
||||||
String[] args = {
|
String[] args = {
|
||||||
|
"--appname",
|
||||||
|
generateAppName(),
|
||||||
"--jar",
|
"--jar",
|
||||||
APPMASTER_JAR,
|
APPMASTER_JAR,
|
||||||
"--num_containers",
|
"--num_containers",
|
||||||
"1",
|
"1",
|
||||||
"--shell_command",
|
"--shell_command",
|
||||||
Shell.WINDOWS ? "dir" : "ls",
|
Shell.WINDOWS ? "dir" : "ls"
|
||||||
"--appname",
|
|
||||||
appName
|
|
||||||
};
|
};
|
||||||
Configuration config = new Configuration(yarnCluster.getConfig());
|
Configuration config = new Configuration(yarnCluster.getConfig());
|
||||||
Client client = new Client(config);
|
Client client = new Client(config);
|
||||||
|
|
Loading…
Reference in New Issue