From 181e6d033ea764012b5d52757bafcf19713fd20a Mon Sep 17 00:00:00 2001 From: Prabhu Joseph Date: Thu, 20 Feb 2020 20:33:59 +0530 Subject: [PATCH] YARN-10119. Option to reset AM failure count for YARN Service Contributed by kyungwan nam. --- .../yarn/service/client/ServiceClient.java | 4 ++ .../yarn/service/conf/YarnServiceConf.java | 3 ++ .../yarn/service/TestYarnNativeServices.java | 43 +++++++++++++++++++ 3 files changed, 50 insertions(+) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java index 46bfa7a4564..b3ac7bbe748 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java @@ -1000,6 +1000,10 @@ ApplicationId submitApp(Service app) throws IOException, YarnException { submissionContext.setMaxAppAttempts(YarnServiceConf .getInt(YarnServiceConf.AM_RESTART_MAX, DEFAULT_AM_RESTART_MAX, app .getConfiguration(), conf)); + submissionContext.setAttemptFailuresValidityInterval(YarnServiceConf + .getLong(YarnServiceConf.AM_FAILURES_VALIDITY_INTERVAL, + DEFAULT_AM_FAILURES_VALIDITY_INTERVAL, app.getConfiguration(), + conf)); setLogAggregationContext(app, conf, submissionContext); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java index 58fe70b4f9b..86c4de2ef89 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/conf/YarnServiceConf.java @@ -44,6 +44,9 @@ public class YarnServiceConf { public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts"; public static final int DEFAULT_AM_RESTART_MAX = 20; + public static final String AM_FAILURES_VALIDITY_INTERVAL = + "yarn.service.am-failure.validity-interval-ms"; + public static final long DEFAULT_AM_FAILURES_VALIDITY_INTERVAL = -1; public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory"; public static final long DEFAULT_KEY_AM_RESOURCE_MEM = 1024; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java index b33972ed291..4ede8cfbb93 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/TestYarnNativeServices.java @@ -938,4 +938,47 @@ public void testRestartServiceForNonExistingInRM() throws Exception { Assert.assertEquals("Restarted service state should be STABLE", ServiceState.STABLE, service.getState()); } + + @Test(timeout = 200000) + public void testAMFailureValidity() throws Exception { + setupInternal(NUM_NMS); + ServiceClient client = createClient(getConf()); + Service exampleApp = new Service(); + exampleApp.setName("example-app"); + exampleApp.setVersion("v1"); + exampleApp.addComponent(createComponent("compa", 2, "sleep 1000")); + Configuration serviceConfig = new Configuration(); + serviceConfig.setProperty(AM_RESTART_MAX, "2"); + serviceConfig.setProperty(AM_FAILURES_VALIDITY_INTERVAL, "1000"); + exampleApp.setConfiguration(serviceConfig); + client.actionCreate(exampleApp); + waitForServiceToBeStable(client, exampleApp); + + Service appStatus1 = client.getStatus(exampleApp.getName()); + ApplicationId exampleAppId = ApplicationId.fromString(appStatus1.getId()); + YarnClient yarnClient = createYarnClient(getConf()); + + // kill AM1 + ApplicationReport applicationReport = yarnClient.getApplicationReport( + exampleAppId); + ApplicationAttemptReport attemptReport = yarnClient + .getApplicationAttemptReport(applicationReport + .getCurrentApplicationAttemptId()); + yarnClient.signalToContainer(attemptReport.getAMContainerId(), + SignalContainerCommand.GRACEFUL_SHUTDOWN); + waitForServiceToBeStable(client, exampleApp); + Assert.assertEquals(ServiceState.STABLE, client.getStatus( + exampleApp.getName()).getState()); + + // kill AM2 after 'yarn.service.am-failure.validity-interval-ms' + Thread.sleep(2000); + applicationReport = yarnClient.getApplicationReport(exampleAppId); + attemptReport = yarnClient.getApplicationAttemptReport(applicationReport + .getCurrentApplicationAttemptId()); + yarnClient.signalToContainer(attemptReport.getAMContainerId(), + SignalContainerCommand.GRACEFUL_SHUTDOWN); + waitForServiceToBeStable(client, exampleApp); + Assert.assertEquals(ServiceState.STABLE, client.getStatus( + exampleApp.getName()).getState()); + } }