YARN-10119. Option to reset AM failure count for YARN Service
Contributed by kyungwan nam.
This commit is contained in:
parent
4af2556b48
commit
181e6d033e
|
@ -1000,6 +1000,10 @@ public class ServiceClient extends AppAdminClient implements SliderExitCodes,
|
||||||
submissionContext.setMaxAppAttempts(YarnServiceConf
|
submissionContext.setMaxAppAttempts(YarnServiceConf
|
||||||
.getInt(YarnServiceConf.AM_RESTART_MAX, DEFAULT_AM_RESTART_MAX, app
|
.getInt(YarnServiceConf.AM_RESTART_MAX, DEFAULT_AM_RESTART_MAX, app
|
||||||
.getConfiguration(), conf));
|
.getConfiguration(), conf));
|
||||||
|
submissionContext.setAttemptFailuresValidityInterval(YarnServiceConf
|
||||||
|
.getLong(YarnServiceConf.AM_FAILURES_VALIDITY_INTERVAL,
|
||||||
|
DEFAULT_AM_FAILURES_VALIDITY_INTERVAL, app.getConfiguration(),
|
||||||
|
conf));
|
||||||
|
|
||||||
setLogAggregationContext(app, conf, submissionContext);
|
setLogAggregationContext(app, conf, submissionContext);
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,9 @@ public class YarnServiceConf {
|
||||||
|
|
||||||
public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
|
public static final String AM_RESTART_MAX = "yarn.service.am-restart.max-attempts";
|
||||||
public static final int DEFAULT_AM_RESTART_MAX = 20;
|
public static final int DEFAULT_AM_RESTART_MAX = 20;
|
||||||
|
public static final String AM_FAILURES_VALIDITY_INTERVAL =
|
||||||
|
"yarn.service.am-failure.validity-interval-ms";
|
||||||
|
public static final long DEFAULT_AM_FAILURES_VALIDITY_INTERVAL = -1;
|
||||||
public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";
|
public static final String AM_RESOURCE_MEM = "yarn.service.am-resource.memory";
|
||||||
public static final long DEFAULT_KEY_AM_RESOURCE_MEM = 1024;
|
public static final long DEFAULT_KEY_AM_RESOURCE_MEM = 1024;
|
||||||
|
|
||||||
|
|
|
@ -938,4 +938,47 @@ public class TestYarnNativeServices extends ServiceTestUtils {
|
||||||
Assert.assertEquals("Restarted service state should be STABLE",
|
Assert.assertEquals("Restarted service state should be STABLE",
|
||||||
ServiceState.STABLE, service.getState());
|
ServiceState.STABLE, service.getState());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout = 200000)
|
||||||
|
public void testAMFailureValidity() throws Exception {
|
||||||
|
setupInternal(NUM_NMS);
|
||||||
|
ServiceClient client = createClient(getConf());
|
||||||
|
Service exampleApp = new Service();
|
||||||
|
exampleApp.setName("example-app");
|
||||||
|
exampleApp.setVersion("v1");
|
||||||
|
exampleApp.addComponent(createComponent("compa", 2, "sleep 1000"));
|
||||||
|
Configuration serviceConfig = new Configuration();
|
||||||
|
serviceConfig.setProperty(AM_RESTART_MAX, "2");
|
||||||
|
serviceConfig.setProperty(AM_FAILURES_VALIDITY_INTERVAL, "1000");
|
||||||
|
exampleApp.setConfiguration(serviceConfig);
|
||||||
|
client.actionCreate(exampleApp);
|
||||||
|
waitForServiceToBeStable(client, exampleApp);
|
||||||
|
|
||||||
|
Service appStatus1 = client.getStatus(exampleApp.getName());
|
||||||
|
ApplicationId exampleAppId = ApplicationId.fromString(appStatus1.getId());
|
||||||
|
YarnClient yarnClient = createYarnClient(getConf());
|
||||||
|
|
||||||
|
// kill AM1
|
||||||
|
ApplicationReport applicationReport = yarnClient.getApplicationReport(
|
||||||
|
exampleAppId);
|
||||||
|
ApplicationAttemptReport attemptReport = yarnClient
|
||||||
|
.getApplicationAttemptReport(applicationReport
|
||||||
|
.getCurrentApplicationAttemptId());
|
||||||
|
yarnClient.signalToContainer(attemptReport.getAMContainerId(),
|
||||||
|
SignalContainerCommand.GRACEFUL_SHUTDOWN);
|
||||||
|
waitForServiceToBeStable(client, exampleApp);
|
||||||
|
Assert.assertEquals(ServiceState.STABLE, client.getStatus(
|
||||||
|
exampleApp.getName()).getState());
|
||||||
|
|
||||||
|
// kill AM2 after 'yarn.service.am-failure.validity-interval-ms'
|
||||||
|
Thread.sleep(2000);
|
||||||
|
applicationReport = yarnClient.getApplicationReport(exampleAppId);
|
||||||
|
attemptReport = yarnClient.getApplicationAttemptReport(applicationReport
|
||||||
|
.getCurrentApplicationAttemptId());
|
||||||
|
yarnClient.signalToContainer(attemptReport.getAMContainerId(),
|
||||||
|
SignalContainerCommand.GRACEFUL_SHUTDOWN);
|
||||||
|
waitForServiceToBeStable(client, exampleApp);
|
||||||
|
Assert.assertEquals(ServiceState.STABLE, client.getStatus(
|
||||||
|
exampleApp.getName()).getState());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue