YARN-4927. TestRMHA#testTransitionedToActiveRefreshFail fails with FairScheduler. (Bibin A Chundatt via kasha)

(cherry picked from commit ff95fd547b)
This commit is contained in:
Karthik Kambatla 2016-04-09 10:31:02 -07:00
parent 9acfde3a03
commit 94a88ae87b
2 changed files with 35 additions and 19 deletions

View File

@ -682,7 +682,11 @@ public class AdminService extends CompositeService implements
return conf; return conf;
} }
private void refreshAll() throws ServiceFailedException { /*
* Visibility could be private for test its made as default
*/
@VisibleForTesting
void refreshAll() throws ServiceFailedException {
try { try {
refreshQueues(RefreshQueuesRequest.newInstance()); refreshQueues(RefreshQueuesRequest.newInstance());
refreshNodes(RefreshNodesRequest.newInstance(DecommissionType.NORMAL)); refreshNodes(RefreshNodesRequest.newInstance(DecommissionType.NORMAL));

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
import org.apache.hadoop.ha.HealthCheckFailedException; import org.apache.hadoop.ha.HealthCheckFailedException;
import org.apache.hadoop.ha.ServiceFailedException;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.AccessControlException;
@ -54,7 +55,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
import org.codehaus.jettison.json.JSONException; import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject; import org.codehaus.jettison.json.JSONObject;
import org.junit.Assert; import org.junit.Assert;
@ -584,19 +584,28 @@ public class TestRMHA {
assertEquals(0, rm.getRMContext().getRMApps().size()); assertEquals(0, rm.getRMContext().getRMApps().size());
} }
@Test(timeout = 90000) @Test(timeout = 9000000)
public void testTransitionedToActiveRefreshFail() throws Exception { public void testTransitionedToActiveRefreshFail() throws Exception {
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false); configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
YarnConfiguration conf = new YarnConfiguration(configuration);
configuration = new CapacitySchedulerConfiguration(conf);
rm = new MockRM(configuration) { rm = new MockRM(configuration) {
@Override @Override
protected AdminService createAdminService() { protected AdminService createAdminService() {
return new AdminService(this, getRMContext()) { return new AdminService(this, getRMContext()) {
int counter = 0;
@Override @Override
protected void setConfig(Configuration conf) { protected void setConfig(Configuration conf) {
super.setConfig(configuration); super.setConfig(configuration);
} }
@Override
protected void refreshAll() throws ServiceFailedException {
if (counter == 0) {
counter++;
throw new ServiceFailedException("Simulate RefreshFail");
} else {
super.refreshAll();
}
}
}; };
} }
@ -611,23 +620,26 @@ public class TestRMHA {
final StateChangeRequestInfo requestInfo = final StateChangeRequestInfo requestInfo =
new StateChangeRequestInfo( new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER); HAServiceProtocol.RequestSource.REQUEST_BY_USER);
configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
rm.adminService.transitionToStandby(requestInfo);
assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
configuration.set("yarn.scheduler.capacity.root.default.capacity", "200");
try {
rm.adminService.transitionToActive(requestInfo);
} catch (Exception e) {
assertTrue("Error on refreshAll during transistion to Active".contains(e
.getMessage()));
}
FailFastDispatcher dispatcher = FailFastDispatcher dispatcher =
((FailFastDispatcher) rm.rmContext.getDispatcher()); ((FailFastDispatcher) rm.rmContext.getDispatcher());
// Verify transistion to transitionToStandby
rm.adminService.transitionToStandby(requestInfo);
assertEquals("Fatal Event should be 0", 0, dispatcher.getEventCount());
assertEquals("HA state should be in standBy State", HAServiceState.STANDBY,
rm.getRMContext().getHAServiceState());
try {
// Verify refreshAll call failure and check fail Event is dispatched
rm.adminService.transitionToActive(requestInfo);
Assert.fail("Transistion to Active should have failed for refreshAll()");
} catch (Exception e) {
assertTrue("Service fail Exception expected",
e instanceof ServiceFailedException);
}
// Since refreshAll failed we are expecting fatal event to be send
// Then fatal event is send RM will shutdown
dispatcher.await(); dispatcher.await();
assertEquals(1, dispatcher.getEventCount()); assertEquals("Fatal Event to be received", 1, dispatcher.getEventCount());
// Making correct conf and check the state // Check of refreshAll success HA can be active
configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
rm.adminService.transitionToActive(requestInfo); rm.adminService.transitionToActive(requestInfo);
assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState()); assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
rm.adminService.transitionToStandby(requestInfo); rm.adminService.transitionToStandby(requestInfo);