diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 7d48a4fe12a..e34dbbf3c04 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -333,6 +333,10 @@ Release 2.4.0 - UNRELEASED re-registration after a RESYNC and thus avoid hanging. (Rohith Sharma via vinodkv) + YARN-1734. Fixed ResourceManager to update the configurations when it + transits from standby to active mode so as to assimilate any changes that + happened while it was in standby mode. (Xuan Gong via vinodkv) + Release 2.3.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 70845c775e4..c53d40f54a1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -250,10 +250,20 @@ public class AdminService extends CompositeService implements @Override public synchronized void transitionToActive( HAServiceProtocol.StateChangeRequestInfo reqInfo) throws IOException { + // call refreshAdminAcls before HA state transition + // for the case that adminAcls have been updated in previous active RM + try { + refreshAdminAcls(false); + } catch (YarnException ex) { + throw new ServiceFailedException("Can not execute refreshAdminAcls", ex); + } + UserGroupInformation user = checkAccess("transitionToActive"); checkHaStateChange(reqInfo); try { rm.transitionToActive(); + // call all refresh*s for active RM to get the updated configurations. + refreshAll(); RMAuditLogger.logSuccess(user.getShortUserName(), "transitionToActive", "RMHAProtocolService"); } catch (Exception e) { @@ -268,6 +278,13 @@ public class AdminService extends CompositeService implements @Override public synchronized void transitionToStandby( HAServiceProtocol.StateChangeRequestInfo reqInfo) throws IOException { + // call refreshAdminAcls before HA state transition + // for the case that adminAcls have been updated in previous active RM + try { + refreshAdminAcls(false); + } catch (YarnException ex) { + throw new ServiceFailedException("Can not execute refreshAdminAcls", ex); + } UserGroupInformation user = checkAccess("transitionToStandby"); checkHaStateChange(reqInfo); try { @@ -406,10 +423,15 @@ public class AdminService extends CompositeService implements @Override public RefreshAdminAclsResponse refreshAdminAcls( RefreshAdminAclsRequest request) throws YarnException, IOException { + return refreshAdminAcls(true); + } + + private RefreshAdminAclsResponse refreshAdminAcls(boolean checkRMHAState) + throws YarnException, IOException { String argName = "refreshAdminAcls"; UserGroupInformation user = checkAcls(argName); - - if (!isRMActive()) { + + if (checkRMHAState && !isRMActive()) { RMAuditLogger.logFailure(user.getShortUserName(), argName, adminAcl.toString(), "AdminService", "ResourceManager is not active. Can not refresh user-groups."); @@ -521,6 +543,24 @@ public class AdminService extends CompositeService implements return conf; } + private void refreshAll() throws ServiceFailedException { + try { + refreshQueues(RefreshQueuesRequest.newInstance()); + refreshNodes(RefreshNodesRequest.newInstance()); + refreshSuperUserGroupsConfiguration( + RefreshSuperUserGroupsConfigurationRequest.newInstance()); + refreshUserToGroupsMappings( + RefreshUserToGroupsMappingsRequest.newInstance()); + if (getConfig().getBoolean( + CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION, + false)) { + refreshServiceAcls(RefreshServiceAclsRequest.newInstance()); + } + } catch (Exception ex) { + throw new ServiceFailedException(ex.getMessage()); + } + } + @VisibleForTesting public AccessControlList getAccessControlList() { return this.adminAcl; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java index e67b81f36f2..60259cddbd5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMAdminService.java @@ -34,12 +34,16 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ha.HAServiceProtocol; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; +import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo; import org.apache.hadoop.security.GroupMappingServiceProvider; import org.apache.hadoop.security.Groups; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.security.authorize.ServiceAuthorizationManager; +import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest; @@ -518,6 +522,94 @@ public class TestRMAdminService { Assert.assertTrue(excludeHosts.contains("0.0.0.0:123")); } + @Test + public void testRMHAWithFileSystemBasedConfiguration() throws IOException, + YarnException { + StateChangeRequestInfo requestInfo = new StateChangeRequestInfo( + HAServiceProtocol.RequestSource.REQUEST_BY_USER); + configuration.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS, + "org.apache.hadoop.yarn.FileSystemBasedConfigurationProvider"); + configuration.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false); + configuration.set(YarnConfiguration.RM_HA_IDS, "rm1,rm2"); + int base = 100; + for (String confKey : YarnConfiguration + .getServiceAddressConfKeys(configuration)) { + configuration.set(HAUtil.addSuffix(confKey, "rm1"), "0.0.0.0:" + + (base + 20)); + configuration.set(HAUtil.addSuffix(confKey, "rm2"), "0.0.0.0:" + + (base + 40)); + base = base * 2; + } + Configuration conf1 = new Configuration(configuration); + conf1.set(YarnConfiguration.RM_HA_ID, "rm1"); + Configuration conf2 = new Configuration(configuration); + conf2.set(YarnConfiguration.RM_HA_ID, "rm2"); + + // upload default configurations + uploadDefaultConfiguration(); + + MockRM rm1 = null; + MockRM rm2 = null; + try { + rm1 = new MockRM(conf1); + rm1.init(conf1); + rm1.start(); + Assert.assertTrue(rm1.getRMContext().getHAServiceState() + == HAServiceState.STANDBY); + + rm2 = new MockRM(conf2); + rm2.init(conf1); + rm2.start(); + Assert.assertTrue(rm2.getRMContext().getHAServiceState() + == HAServiceState.STANDBY); + + rm1.adminService.transitionToActive(requestInfo); + Assert.assertTrue(rm1.getRMContext().getHAServiceState() + == HAServiceState.ACTIVE); + + CapacitySchedulerConfiguration csConf = + new CapacitySchedulerConfiguration(); + csConf.set("yarn.scheduler.capacity.maximum-applications", "5000"); + uploadConfiguration(csConf, "capacity-scheduler.xml"); + + rm1.adminService.refreshQueues(RefreshQueuesRequest.newInstance()); + + int maxApps = + ((CapacityScheduler) rm1.getRMContext().getScheduler()) + .getConfiguration().getMaximumSystemApplications(); + Assert.assertEquals(maxApps, 5000); + + // Before failover happens, the maxApps is + // still the default value on the standby rm : rm2 + int maxAppsBeforeFailOver = + ((CapacityScheduler) rm2.getRMContext().getScheduler()) + .getConfiguration().getMaximumSystemApplications(); + Assert.assertEquals(maxAppsBeforeFailOver, 10000); + + // Do the failover + rm1.adminService.transitionToStandby(requestInfo); + rm2.adminService.transitionToActive(requestInfo); + Assert.assertTrue(rm1.getRMContext().getHAServiceState() + == HAServiceState.STANDBY); + Assert.assertTrue(rm2.getRMContext().getHAServiceState() + == HAServiceState.ACTIVE); + + int maxAppsAfter = + ((CapacityScheduler) rm2.getRMContext().getScheduler()) + .getConfiguration().getMaximumSystemApplications(); + + Assert.assertEquals(maxAppsAfter, 5000); + } finally { + if (rm1 != null) { + rm1.stop(); + } + if (rm2 != null) { + rm2.stop(); + } + } + } + private String writeConfigurationXML(Configuration conf, String confXMLName) throws IOException { DataOutputStream output = null;