YARN-4927. TestRMHA#testTransitionedToActiveRefreshFail fails with FairScheduler. (Bibin A Chundatt via kasha)

(cherry picked from commit ff95fd547b)
This commit is contained in:
Karthik Kambatla 2016-04-09 10:31:02 -07:00
parent 9acfde3a03
commit 94a88ae87b
2 changed files with 35 additions and 19 deletions

View File

@ -682,7 +682,11 @@ private synchronized Configuration getConfiguration(Configuration conf,
return conf;
}
private void refreshAll() throws ServiceFailedException {
/*
* Visibility could be private for test its made as default
*/
@VisibleForTesting
void refreshAll() throws ServiceFailedException {
try {
refreshQueues(RefreshQueuesRequest.newInstance());
refreshNodes(RefreshNodesRequest.newInstance(DecommissionType.NORMAL));

View File

@ -35,6 +35,7 @@
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException;
@ -54,7 +55,6 @@
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.junit.Assert;
@ -584,19 +584,28 @@ protected void serviceStart() throws Exception {
assertEquals(0, rm.getRMContext().getRMApps().size());
}
@Test(timeout = 90000)
@Test(timeout = 9000000)
public void testTransitionedToActiveRefreshFail() throws Exception {
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
YarnConfiguration conf = new YarnConfiguration(configuration);
configuration = new CapacitySchedulerConfiguration(conf);
rm = new MockRM(configuration) {
@Override
protected AdminService createAdminService() {
return new AdminService(this, getRMContext()) {
int counter = 0;
@Override
protected void setConfig(Configuration conf) {
super.setConfig(configuration);
}
@Override
protected void refreshAll() throws ServiceFailedException {
if (counter == 0) {
counter++;
throw new ServiceFailedException("Simulate RefreshFail");
} else {
super.refreshAll();
}
}
};
}
@ -611,23 +620,26 @@ protected Dispatcher createDispatcher() {
final StateChangeRequestInfo requestInfo =
new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
rm.adminService.transitionToStandby(requestInfo);
assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
configuration.set("yarn.scheduler.capacity.root.default.capacity", "200");
try {
rm.adminService.transitionToActive(requestInfo);
} catch (Exception e) {
assertTrue("Error on refreshAll during transistion to Active".contains(e
.getMessage()));
}
FailFastDispatcher dispatcher =
((FailFastDispatcher) rm.rmContext.getDispatcher());
// Verify transistion to transitionToStandby
rm.adminService.transitionToStandby(requestInfo);
assertEquals("Fatal Event should be 0", 0, dispatcher.getEventCount());
assertEquals("HA state should be in standBy State", HAServiceState.STANDBY,
rm.getRMContext().getHAServiceState());
try {
// Verify refreshAll call failure and check fail Event is dispatched
rm.adminService.transitionToActive(requestInfo);
Assert.fail("Transistion to Active should have failed for refreshAll()");
} catch (Exception e) {
assertTrue("Service fail Exception expected",
e instanceof ServiceFailedException);
}
// Since refreshAll failed we are expecting fatal event to be send
// Then fatal event is send RM will shutdown
dispatcher.await();
assertEquals(1, dispatcher.getEventCount());
// Making correct conf and check the state
configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
assertEquals("Fatal Event to be received", 1, dispatcher.getEventCount());
// Check of refreshAll success HA can be active
rm.adminService.transitionToActive(requestInfo);
assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
rm.adminService.transitionToStandby(requestInfo);