YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into

DistributedShell. Contributed by Xuan Gong
This commit is contained in:
XuanGong 2014-09-16 10:58:18 -07:00
parent 9f6891d9ef
commit 8e5d6713cf
4 changed files with 154 additions and 1 deletions

View File

@ -229,6 +229,9 @@ Release 2.6.0 - UNRELEASED
YARN-2547. Cross Origin Filter throws UnsupportedOperationException upon
destroy (Mit Desai via jeagles)
YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into
DistributedShell (xgong)
OPTIMIZATIONS
BUG FIXES

View File

@ -75,7 +75,6 @@
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.Records;
/**
* Client for Distributed Shell application submission to YARN.
@ -163,6 +162,8 @@ public class Client {
// flag to indicate whether to keep containers across application attempts.
private boolean keepContainers = false;
private long attemptFailuresValidityInterval = -1;
// Debug flag
boolean debugFlag = false;
@ -248,6 +249,12 @@ public Client(Configuration conf) throws Exception {
" If the flag is true, running containers will not be killed when" +
" application attempt fails and these containers will be retrieved by" +
" the new application attempt ");
opts.addOption("attempt_failures_validity_interval", true,
"when attempt_failures_validity_interval in milliseconds is set to > 0," +
"the failure number will not take failures which happen out of " +
"the validityInterval into failure count. " +
"If failure count reaches to maxAppAttempts, " +
"the application will be failed.");
opts.addOption("debug", false, "Dump out debug information");
opts.addOption("help", false, "Print usage");
@ -372,6 +379,10 @@ public boolean init(String[] args) throws ParseException {
clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000"));
attemptFailuresValidityInterval =
Long.parseLong(cliParser.getOptionValue(
"attempt_failures_validity_interval", "-1"));
log4jPropFile = cliParser.getOptionValue("log_properties", "");
return true;
@ -456,6 +467,11 @@ public boolean run() throws IOException, YarnException {
appContext.setKeepContainersAcrossApplicationAttempts(keepContainers);
appContext.setApplicationName(appName);
if (attemptFailuresValidityInterval >= 0) {
appContext
.setAttemptFailuresValidityInterval(attemptFailuresValidityInterval);
}
// set local resources for the application master
// local files or archives as needed
// In this scenario, the jar file for the application master is part of the local resources

View File

@ -0,0 +1,58 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.applications.distributedshell;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class TestDSSleepingAppMaster extends ApplicationMaster{
private static final Log LOG = LogFactory.getLog(TestDSSleepingAppMaster.class);
private static final long SLEEP_TIME = 5000;
public static void main(String[] args) {
boolean result = false;
try {
TestDSSleepingAppMaster appMaster = new TestDSSleepingAppMaster();
boolean doRun = appMaster.init(args);
if (!doRun) {
System.exit(0);
}
appMaster.run();
if (appMaster.appAttemptID.getAttemptId() <= 2) {
try {
// sleep some time
Thread.sleep(SLEEP_TIME);
} catch (InterruptedException e) {}
// fail the first am.
System.exit(100);
}
result = appMaster.finish();
} catch (Throwable t) {
System.exit(1);
}
if (result) {
LOG.info("Application Master completed successfully. exiting");
System.exit(0);
} else {
LOG.info("Application Master failed. exiting");
System.exit(2);
}
}
}

View File

@ -308,6 +308,82 @@ public void testDSRestartWithPreviousRunningContainers() throws Exception {
Assert.assertTrue(result);
}
/*
* The sleeping period in TestDSSleepingAppMaster is set as 5 seconds.
* Set attempt_failures_validity_interval as 2.5 seconds. It will check
* how many attempt failures for previous 2.5 seconds.
* The application is expected to be successful.
*/
@Test(timeout=90000)
public void testDSAttemptFailuresValidityIntervalSucess() throws Exception {
String[] args = {
"--jar",
APPMASTER_JAR,
"--num_containers",
"1",
"--shell_command",
"sleep 8",
"--master_memory",
"512",
"--container_memory",
"128",
"--attempt_failures_validity_interval",
"2500"
};
LOG.info("Initializing DS Client");
Configuration conf = yarnCluster.getConfig();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
Client client = new Client(TestDSSleepingAppMaster.class.getName(),
new Configuration(conf));
client.init(args);
LOG.info("Running DS Client");
boolean result = client.run();
LOG.info("Client run completed. Result=" + result);
// application should succeed
Assert.assertTrue(result);
}
/*
* The sleeping period in TestDSSleepingAppMaster is set as 5 seconds.
* Set attempt_failures_validity_interval as 15 seconds. It will check
* how many attempt failure for previous 15 seconds.
* The application is expected to be fail.
*/
@Test(timeout=90000)
public void testDSAttemptFailuresValidityIntervalFailed() throws Exception {
String[] args = {
"--jar",
APPMASTER_JAR,
"--num_containers",
"1",
"--shell_command",
"sleep 8",
"--master_memory",
"512",
"--container_memory",
"128",
"--attempt_failures_validity_interval",
"15000"
};
LOG.info("Initializing DS Client");
Configuration conf = yarnCluster.getConfig();
conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2);
Client client = new Client(TestDSSleepingAppMaster.class.getName(),
new Configuration(conf));
client.init(args);
LOG.info("Running DS Client");
boolean result = client.run();
LOG.info("Client run completed. Result=" + result);
// application should be failed
Assert.assertFalse(result);
}
@Test(timeout=90000)
public void testDSShellWithCustomLogPropertyFile() throws Exception {
final File basedir =