YARN-4927. TestRMHA#testTransitionedToActiveRefreshFail fails with FairScheduler. (Bibin A Chundatt via kasha)
(cherry picked from commit ff95fd547b
)
This commit is contained in:
parent
9acfde3a03
commit
94a88ae87b
|
@ -682,7 +682,11 @@ public class AdminService extends CompositeService implements
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void refreshAll() throws ServiceFailedException {
|
/*
|
||||||
|
* Visibility could be private for test its made as default
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
void refreshAll() throws ServiceFailedException {
|
||||||
try {
|
try {
|
||||||
refreshQueues(RefreshQueuesRequest.newInstance());
|
refreshQueues(RefreshQueuesRequest.newInstance());
|
||||||
refreshNodes(RefreshNodesRequest.newInstance(DecommissionType.NORMAL));
|
refreshNodes(RefreshNodesRequest.newInstance(DecommissionType.NORMAL));
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
||||||
import org.apache.hadoop.ha.HealthCheckFailedException;
|
import org.apache.hadoop.ha.HealthCheckFailedException;
|
||||||
|
import org.apache.hadoop.ha.ServiceFailedException;
|
||||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
import org.apache.hadoop.security.AccessControlException;
|
||||||
|
@ -54,7 +55,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration;
|
|
||||||
import org.codehaus.jettison.json.JSONException;
|
import org.codehaus.jettison.json.JSONException;
|
||||||
import org.codehaus.jettison.json.JSONObject;
|
import org.codehaus.jettison.json.JSONObject;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
@ -584,19 +584,28 @@ public class TestRMHA {
|
||||||
assertEquals(0, rm.getRMContext().getRMApps().size());
|
assertEquals(0, rm.getRMContext().getRMApps().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout = 90000)
|
@Test(timeout = 9000000)
|
||||||
public void testTransitionedToActiveRefreshFail() throws Exception {
|
public void testTransitionedToActiveRefreshFail() throws Exception {
|
||||||
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
|
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
|
||||||
YarnConfiguration conf = new YarnConfiguration(configuration);
|
|
||||||
configuration = new CapacitySchedulerConfiguration(conf);
|
|
||||||
rm = new MockRM(configuration) {
|
rm = new MockRM(configuration) {
|
||||||
@Override
|
@Override
|
||||||
protected AdminService createAdminService() {
|
protected AdminService createAdminService() {
|
||||||
return new AdminService(this, getRMContext()) {
|
return new AdminService(this, getRMContext()) {
|
||||||
|
int counter = 0;
|
||||||
@Override
|
@Override
|
||||||
protected void setConfig(Configuration conf) {
|
protected void setConfig(Configuration conf) {
|
||||||
super.setConfig(configuration);
|
super.setConfig(configuration);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void refreshAll() throws ServiceFailedException {
|
||||||
|
if (counter == 0) {
|
||||||
|
counter++;
|
||||||
|
throw new ServiceFailedException("Simulate RefreshFail");
|
||||||
|
} else {
|
||||||
|
super.refreshAll();
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -611,23 +620,26 @@ public class TestRMHA {
|
||||||
final StateChangeRequestInfo requestInfo =
|
final StateChangeRequestInfo requestInfo =
|
||||||
new StateChangeRequestInfo(
|
new StateChangeRequestInfo(
|
||||||
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
|
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
|
||||||
|
|
||||||
configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
|
|
||||||
rm.adminService.transitionToStandby(requestInfo);
|
|
||||||
assertEquals(HAServiceState.STANDBY, rm.getRMContext().getHAServiceState());
|
|
||||||
configuration.set("yarn.scheduler.capacity.root.default.capacity", "200");
|
|
||||||
try {
|
|
||||||
rm.adminService.transitionToActive(requestInfo);
|
|
||||||
} catch (Exception e) {
|
|
||||||
assertTrue("Error on refreshAll during transistion to Active".contains(e
|
|
||||||
.getMessage()));
|
|
||||||
}
|
|
||||||
FailFastDispatcher dispatcher =
|
FailFastDispatcher dispatcher =
|
||||||
((FailFastDispatcher) rm.rmContext.getDispatcher());
|
((FailFastDispatcher) rm.rmContext.getDispatcher());
|
||||||
|
// Verify transistion to transitionToStandby
|
||||||
|
rm.adminService.transitionToStandby(requestInfo);
|
||||||
|
assertEquals("Fatal Event should be 0", 0, dispatcher.getEventCount());
|
||||||
|
assertEquals("HA state should be in standBy State", HAServiceState.STANDBY,
|
||||||
|
rm.getRMContext().getHAServiceState());
|
||||||
|
try {
|
||||||
|
// Verify refreshAll call failure and check fail Event is dispatched
|
||||||
|
rm.adminService.transitionToActive(requestInfo);
|
||||||
|
Assert.fail("Transistion to Active should have failed for refreshAll()");
|
||||||
|
} catch (Exception e) {
|
||||||
|
assertTrue("Service fail Exception expected",
|
||||||
|
e instanceof ServiceFailedException);
|
||||||
|
}
|
||||||
|
// Since refreshAll failed we are expecting fatal event to be send
|
||||||
|
// Then fatal event is send RM will shutdown
|
||||||
dispatcher.await();
|
dispatcher.await();
|
||||||
assertEquals(1, dispatcher.getEventCount());
|
assertEquals("Fatal Event to be received", 1, dispatcher.getEventCount());
|
||||||
// Making correct conf and check the state
|
// Check of refreshAll success HA can be active
|
||||||
configuration.set("yarn.scheduler.capacity.root.default.capacity", "100");
|
|
||||||
rm.adminService.transitionToActive(requestInfo);
|
rm.adminService.transitionToActive(requestInfo);
|
||||||
assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
|
assertEquals(HAServiceState.ACTIVE, rm.getRMContext().getHAServiceState());
|
||||||
rm.adminService.transitionToStandby(requestInfo);
|
rm.adminService.transitionToStandby(requestInfo);
|
||||||
|
|
Loading…
Reference in New Issue