From 8e5d6713cf16473d791c028cecc274fd2c7fd10b Mon Sep 17 00:00:00 2001 From: XuanGong Date: Tue, 16 Sep 2014 10:58:18 -0700 Subject: [PATCH] YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into DistributedShell. Contributed by Xuan Gong --- hadoop-yarn-project/CHANGES.txt | 3 + .../applications/distributedshell/Client.java | 18 ++++- .../TestDSSleepingAppMaster.java | 58 ++++++++++++++ .../TestDistributedShell.java | 76 +++++++++++++++++++ 4 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 66623ec6624..ec59cba0f5d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -229,6 +229,9 @@ Release 2.6.0 - UNRELEASED YARN-2547. Cross Origin Filter throws UnsupportedOperationException upon destroy (Mit Desai via jeagles) + YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into + DistributedShell (xgong) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java index a86b52132eb..f3ce64ce074 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/main/java/org/apache/hadoop/yarn/applications/distributedshell/Client.java @@ -75,7 +75,6 @@ import org.apache.hadoop.yarn.client.api.YarnClientApplication; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.util.ConverterUtils; -import org.apache.hadoop.yarn.util.Records; /** * Client for Distributed Shell application submission to YARN. @@ -163,6 +162,8 @@ public class Client { // flag to indicate whether to keep containers across application attempts. private boolean keepContainers = false; + private long attemptFailuresValidityInterval = -1; + // Debug flag boolean debugFlag = false; @@ -248,6 +249,12 @@ public class Client { " If the flag is true, running containers will not be killed when" + " application attempt fails and these containers will be retrieved by" + " the new application attempt "); + opts.addOption("attempt_failures_validity_interval", true, + "when attempt_failures_validity_interval in milliseconds is set to > 0," + + "the failure number will not take failures which happen out of " + + "the validityInterval into failure count. " + + "If failure count reaches to maxAppAttempts, " + + "the application will be failed."); opts.addOption("debug", false, "Dump out debug information"); opts.addOption("help", false, "Print usage"); @@ -372,6 +379,10 @@ public class Client { clientTimeout = Integer.parseInt(cliParser.getOptionValue("timeout", "600000")); + attemptFailuresValidityInterval = + Long.parseLong(cliParser.getOptionValue( + "attempt_failures_validity_interval", "-1")); + log4jPropFile = cliParser.getOptionValue("log_properties", ""); return true; @@ -456,6 +467,11 @@ public class Client { appContext.setKeepContainersAcrossApplicationAttempts(keepContainers); appContext.setApplicationName(appName); + if (attemptFailuresValidityInterval >= 0) { + appContext + .setAttemptFailuresValidityInterval(attemptFailuresValidityInterval); + } + // set local resources for the application master // local files or archives as needed // In this scenario, the jar file for the application master is part of the local resources diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java new file mode 100644 index 00000000000..3004b6954e3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDSSleepingAppMaster.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.applications.distributedshell; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +public class TestDSSleepingAppMaster extends ApplicationMaster{ + + private static final Log LOG = LogFactory.getLog(TestDSSleepingAppMaster.class); + private static final long SLEEP_TIME = 5000; + + public static void main(String[] args) { + boolean result = false; + try { + TestDSSleepingAppMaster appMaster = new TestDSSleepingAppMaster(); + boolean doRun = appMaster.init(args); + if (!doRun) { + System.exit(0); + } + appMaster.run(); + if (appMaster.appAttemptID.getAttemptId() <= 2) { + try { + // sleep some time + Thread.sleep(SLEEP_TIME); + } catch (InterruptedException e) {} + // fail the first am. + System.exit(100); + } + result = appMaster.finish(); + } catch (Throwable t) { + System.exit(1); + } + if (result) { + LOG.info("Application Master completed successfully. exiting"); + System.exit(0); + } else { + LOG.info("Application Master failed. exiting"); + System.exit(2); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java index d7a174516a2..6dff94c7684 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/src/test/java/org/apache/hadoop/yarn/applications/distributedshell/TestDistributedShell.java @@ -308,6 +308,82 @@ public class TestDistributedShell { Assert.assertTrue(result); } + /* + * The sleeping period in TestDSSleepingAppMaster is set as 5 seconds. + * Set attempt_failures_validity_interval as 2.5 seconds. It will check + * how many attempt failures for previous 2.5 seconds. + * The application is expected to be successful. + */ + @Test(timeout=90000) + public void testDSAttemptFailuresValidityIntervalSucess() throws Exception { + String[] args = { + "--jar", + APPMASTER_JAR, + "--num_containers", + "1", + "--shell_command", + "sleep 8", + "--master_memory", + "512", + "--container_memory", + "128", + "--attempt_failures_validity_interval", + "2500" + }; + + LOG.info("Initializing DS Client"); + Configuration conf = yarnCluster.getConfig(); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2); + Client client = new Client(TestDSSleepingAppMaster.class.getName(), + new Configuration(conf)); + + client.init(args); + LOG.info("Running DS Client"); + boolean result = client.run(); + + LOG.info("Client run completed. Result=" + result); + // application should succeed + Assert.assertTrue(result); + } + + /* + * The sleeping period in TestDSSleepingAppMaster is set as 5 seconds. + * Set attempt_failures_validity_interval as 15 seconds. It will check + * how many attempt failure for previous 15 seconds. + * The application is expected to be fail. + */ + @Test(timeout=90000) + public void testDSAttemptFailuresValidityIntervalFailed() throws Exception { + String[] args = { + "--jar", + APPMASTER_JAR, + "--num_containers", + "1", + "--shell_command", + "sleep 8", + "--master_memory", + "512", + "--container_memory", + "128", + "--attempt_failures_validity_interval", + "15000" + }; + + LOG.info("Initializing DS Client"); + Configuration conf = yarnCluster.getConfig(); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 2); + Client client = new Client(TestDSSleepingAppMaster.class.getName(), + new Configuration(conf)); + + client.init(args); + LOG.info("Running DS Client"); + boolean result = client.run(); + + LOG.info("Client run completed. Result=" + result); + // application should be failed + Assert.assertFalse(result); + } + @Test(timeout=90000) public void testDSShellWithCustomLogPropertyFile() throws Exception { final File basedir =