YARN-1734. Fixed ResourceManager to update the configurations when it transits from standby to active mode so as to assimilate any changes that happened while it was in standby mode. Contributed by Xuan Gong.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1571539 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ae29d9ee04
commit
ad70f26b1f
|
@ -348,6 +348,10 @@ Release 2.4.0 - UNRELEASED
|
|||
re-registration after a RESYNC and thus avoid hanging. (Rohith Sharma via
|
||||
vinodkv)
|
||||
|
||||
YARN-1734. Fixed ResourceManager to update the configurations when it
|
||||
transits from standby to active mode so as to assimilate any changes that
|
||||
happened while it was in standby mode. (Xuan Gong via vinodkv)
|
||||
|
||||
Release 2.3.1 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -250,10 +250,20 @@ public class AdminService extends CompositeService implements
|
|||
@Override
|
||||
public synchronized void transitionToActive(
|
||||
HAServiceProtocol.StateChangeRequestInfo reqInfo) throws IOException {
|
||||
// call refreshAdminAcls before HA state transition
|
||||
// for the case that adminAcls have been updated in previous active RM
|
||||
try {
|
||||
refreshAdminAcls(false);
|
||||
} catch (YarnException ex) {
|
||||
throw new ServiceFailedException("Can not execute refreshAdminAcls", ex);
|
||||
}
|
||||
|
||||
UserGroupInformation user = checkAccess("transitionToActive");
|
||||
checkHaStateChange(reqInfo);
|
||||
try {
|
||||
rm.transitionToActive();
|
||||
// call all refresh*s for active RM to get the updated configurations.
|
||||
refreshAll();
|
||||
RMAuditLogger.logSuccess(user.getShortUserName(),
|
||||
"transitionToActive", "RMHAProtocolService");
|
||||
} catch (Exception e) {
|
||||
|
@ -268,6 +278,13 @@ public class AdminService extends CompositeService implements
|
|||
@Override
|
||||
public synchronized void transitionToStandby(
|
||||
HAServiceProtocol.StateChangeRequestInfo reqInfo) throws IOException {
|
||||
// call refreshAdminAcls before HA state transition
|
||||
// for the case that adminAcls have been updated in previous active RM
|
||||
try {
|
||||
refreshAdminAcls(false);
|
||||
} catch (YarnException ex) {
|
||||
throw new ServiceFailedException("Can not execute refreshAdminAcls", ex);
|
||||
}
|
||||
UserGroupInformation user = checkAccess("transitionToStandby");
|
||||
checkHaStateChange(reqInfo);
|
||||
try {
|
||||
|
@ -406,10 +423,15 @@ public class AdminService extends CompositeService implements
|
|||
@Override
|
||||
public RefreshAdminAclsResponse refreshAdminAcls(
|
||||
RefreshAdminAclsRequest request) throws YarnException, IOException {
|
||||
return refreshAdminAcls(true);
|
||||
}
|
||||
|
||||
private RefreshAdminAclsResponse refreshAdminAcls(boolean checkRMHAState)
|
||||
throws YarnException, IOException {
|
||||
String argName = "refreshAdminAcls";
|
||||
UserGroupInformation user = checkAcls(argName);
|
||||
|
||||
if (!isRMActive()) {
|
||||
|
||||
if (checkRMHAState && !isRMActive()) {
|
||||
RMAuditLogger.logFailure(user.getShortUserName(), argName,
|
||||
adminAcl.toString(), "AdminService",
|
||||
"ResourceManager is not active. Can not refresh user-groups.");
|
||||
|
@ -521,6 +543,24 @@ public class AdminService extends CompositeService implements
|
|||
return conf;
|
||||
}
|
||||
|
||||
private void refreshAll() throws ServiceFailedException {
|
||||
try {
|
||||
refreshQueues(RefreshQueuesRequest.newInstance());
|
||||
refreshNodes(RefreshNodesRequest.newInstance());
|
||||
refreshSuperUserGroupsConfiguration(
|
||||
RefreshSuperUserGroupsConfigurationRequest.newInstance());
|
||||
refreshUserToGroupsMappings(
|
||||
RefreshUserToGroupsMappingsRequest.newInstance());
|
||||
if (getConfig().getBoolean(
|
||||
CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION,
|
||||
false)) {
|
||||
refreshServiceAcls(RefreshServiceAclsRequest.newInstance());
|
||||
}
|
||||
} catch (Exception ex) {
|
||||
throw new ServiceFailedException(ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public AccessControlList getAccessControlList() {
|
||||
return this.adminAcl;
|
||||
|
|
|
@ -34,12 +34,16 @@ import org.apache.hadoop.fs.CommonConfigurationKeys;
|
|||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
||||
import org.apache.hadoop.security.GroupMappingServiceProvider;
|
||||
import org.apache.hadoop.security.Groups;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hadoop.security.authorize.AccessControlList;
|
||||
import org.apache.hadoop.security.authorize.ProxyUsers;
|
||||
import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
|
||||
import org.apache.hadoop.yarn.conf.HAUtil;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnException;
|
||||
import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest;
|
||||
|
@ -518,6 +522,94 @@ public class TestRMAdminService {
|
|||
Assert.assertTrue(excludeHosts.contains("0.0.0.0:123"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRMHAWithFileSystemBasedConfiguration() throws IOException,
|
||||
YarnException {
|
||||
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
|
||||
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
|
||||
configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,
|
||||
"org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider");
|
||||
configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
|
||||
configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
|
||||
configuration.set(YarnConfiguration.RM_HA_IDS, "rm1,rm2");
|
||||
int base = 100;
|
||||
for (String confKey : YarnConfiguration
|
||||
.getServiceAddressConfKeys(configuration)) {
|
||||
configuration.set(HAUtil.addSuffix(confKey, "rm1"), "0.0.0.0:"
|
||||
+ (base + 20));
|
||||
configuration.set(HAUtil.addSuffix(confKey, "rm2"), "0.0.0.0:"
|
||||
+ (base + 40));
|
||||
base = base * 2;
|
||||
}
|
||||
Configuration conf1 = new Configuration(configuration);
|
||||
conf1.set(YarnConfiguration.RM_HA_ID, "rm1");
|
||||
Configuration conf2 = new Configuration(configuration);
|
||||
conf2.set(YarnConfiguration.RM_HA_ID, "rm2");
|
||||
|
||||
// upload default configurations
|
||||
uploadDefaultConfiguration();
|
||||
|
||||
MockRM rm1 = null;
|
||||
MockRM rm2 = null;
|
||||
try {
|
||||
rm1 = new MockRM(conf1);
|
||||
rm1.init(conf1);
|
||||
rm1.start();
|
||||
Assert.assertTrue(rm1.getRMContext().getHAServiceState()
|
||||
== HAServiceState.STANDBY);
|
||||
|
||||
rm2 = new MockRM(conf2);
|
||||
rm2.init(conf1);
|
||||
rm2.start();
|
||||
Assert.assertTrue(rm2.getRMContext().getHAServiceState()
|
||||
== HAServiceState.STANDBY);
|
||||
|
||||
rm1.adminService.transitionToActive(requestInfo);
|
||||
Assert.assertTrue(rm1.getRMContext().getHAServiceState()
|
||||
== HAServiceState.ACTIVE);
|
||||
|
||||
CapacitySchedulerConfiguration csConf =
|
||||
new CapacitySchedulerConfiguration();
|
||||
csConf.set("yarn.scheduler.capacity.maximum-applications", "5000");
|
||||
uploadConfiguration(csConf, "capacity-scheduler.xml");
|
||||
|
||||
rm1.adminService.refreshQueues(RefreshQueuesRequest.newInstance());
|
||||
|
||||
int maxApps =
|
||||
((CapacityScheduler) rm1.getRMContext().getScheduler())
|
||||
.getConfiguration().getMaximumSystemApplications();
|
||||
Assert.assertEquals(maxApps, 5000);
|
||||
|
||||
// Before failover happens, the maxApps is
|
||||
// still the default value on the standby rm : rm2
|
||||
int maxAppsBeforeFailOver =
|
||||
((CapacityScheduler) rm2.getRMContext().getScheduler())
|
||||
.getConfiguration().getMaximumSystemApplications();
|
||||
Assert.assertEquals(maxAppsBeforeFailOver, 10000);
|
||||
|
||||
// Do the failover
|
||||
rm1.adminService.transitionToStandby(requestInfo);
|
||||
rm2.adminService.transitionToActive(requestInfo);
|
||||
Assert.assertTrue(rm1.getRMContext().getHAServiceState()
|
||||
== HAServiceState.STANDBY);
|
||||
Assert.assertTrue(rm2.getRMContext().getHAServiceState()
|
||||
== HAServiceState.ACTIVE);
|
||||
|
||||
int maxAppsAfter =
|
||||
((CapacityScheduler) rm2.getRMContext().getScheduler())
|
||||
.getConfiguration().getMaximumSystemApplications();
|
||||
|
||||
Assert.assertEquals(maxAppsAfter, 5000);
|
||||
} finally {
|
||||
if (rm1 != null) {
|
||||
rm1.stop();
|
||||
}
|
||||
if (rm2 != null) {
|
||||
rm2.stop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String writeConfigurationXML(Configuration conf, String confXMLName)
|
||||
throws IOException {
|
||||
DataOutputStream output = null;
|
||||
|
|
Loading…
Reference in New Issue