YARN-10119. Option to reset AM failure count for YARN Service

Contributed by kyungwan nam.
This commit is contained in:
Prabhu Joseph 2020-02-20 20:33:59 +05:30 committed by Prabhu Joseph
parent 4af2556b48
commit 181e6d033e
3 changed files with 50 additions and 0 deletions

View File

@ -1000,6 +1000,10 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
submissionContext.setMaxAppAttempts(YarnServiceConf
.getInt(YarnServiceConf.AM_RESTART_MAX, DEFAULT_AM_RESTART_MAX, app
.getConfiguration(), conf));
submissionContext.setAttemptFailuresValidityInterval(YarnServiceConf
.getLong(YarnServiceConf.AM_FAILURES_VALIDITY_INTERVAL,
DEFAULT_AM_FAILURES_VALIDITY_INTERVAL, app.getConfiguration(),
conf));
setLogAggregationContext(app, conf, submissionContext);

View File

@ -44,6 +44,9 @@ public class YarnServiceConf {
public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
public static final int DEFAULT_AM_RESTART_MAX = 20;
public static final String AM_FAILURES_VALIDITY_INTERVAL =
"yarn.service.am-failure.validity-interval-ms";
public static final long DEFAULT_AM_FAILURES_VALIDITY_INTERVAL = -1;
public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";
public static final long DEFAULT_KEY_AM_RESOURCE_MEM = 1024;

View File

@ -938,4 +938,47 @@ public class TestYarnNativeServices extends ServiceTestUtils {
Assert.assertEquals("Restarted service state should be STABLE",
ServiceState.STABLE, service.getState());
}
@Test(timeout = 200000)
public void testAMFailureValidity() throws Exception {
setupInternal(NUM_NMS);
ServiceClient client = createClient(getConf());
Service exampleApp = new Service();
exampleApp.setName("example-app");
exampleApp.setVersion("v1");
exampleApp.addComponent(createComponent("compa", 2, "sleep 1000"));
Configuration serviceConfig = new Configuration();
serviceConfig.setProperty(AM_RESTART_MAX, "2");
serviceConfig.setProperty(AM_FAILURES_VALIDITY_INTERVAL, "1000");
exampleApp.setConfiguration(serviceConfig);
client.actionCreate(exampleApp);
waitForServiceToBeStable(client, exampleApp);
Service appStatus1 = client.getStatus(exampleApp.getName());
ApplicationId exampleAppId = ApplicationId.fromString(appStatus1.getId());
YarnClient yarnClient = createYarnClient(getConf());
// kill AM1
ApplicationReport applicationReport = yarnClient.getApplicationReport(
exampleAppId);
ApplicationAttemptReport attemptReport = yarnClient
.getApplicationAttemptReport(applicationReport
.getCurrentApplicationAttemptId());
yarnClient.signalToContainer(attemptReport.getAMContainerId(),
SignalContainerCommand.GRACEFUL_SHUTDOWN);
waitForServiceToBeStable(client, exampleApp);
Assert.assertEquals(ServiceState.STABLE, client.getStatus(
exampleApp.getName()).getState());
// kill AM2 after 'yarn.service.am-failure.validity-interval-ms'
Thread.sleep(2000);
applicationReport = yarnClient.getApplicationReport(exampleAppId);
attemptReport = yarnClient.getApplicationAttemptReport(applicationReport
.getCurrentApplicationAttemptId());
yarnClient.signalToContainer(attemptReport.getAMContainerId(),
SignalContainerCommand.GRACEFUL_SHUTDOWN);
waitForServiceToBeStable(client, exampleApp);
Assert.assertEquals(ServiceState.STABLE, client.getStatus(
exampleApp.getName()).getState());
}
}