From 7029fc356f234ee055ec3d42a140c4cde36c8a53 Mon Sep 17 00:00:00 2001 From: Jian He Date: Tue, 3 Nov 2015 15:33:13 -0800 Subject: [PATCH] YARN-4127. RM fail with noAuth error if switched from failover mode to non-failover mode. Contributed by Varun Saxena --- hadoop-yarn-project/CHANGES.txt | 3 + .../recovery/ZKRMStateStore.java | 46 +++++++----- .../recovery/TestZKRMStateStore.java | 70 +++++++++++++++++++ 3 files changed, 102 insertions(+), 17 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index ef2e58be2af..5d6cd8068f2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -118,6 +118,9 @@ Release 2.7.2 - UNRELEASED YARN-3136. getTransferredContainers can be a bottleneck during AM registration. (Sunil G via jianhe) + YARN-4127. RM fail with noAuth error if switched from failover mode to non-failover + mode. (Varun Saxena via jianhe) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java index 543d89191b4..f12ada76b34 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java @@ -305,8 +305,9 @@ public class ZKRMStateStore extends RMStateStore { // ensure root dirs exist createRootDirRecursively(znodeWorkingPath); createRootDir(zkRootNodePath); + setRootNodeAcls(); + deleteFencingNodePath(); if (HAUtil.isHAEnabled(getConfig())){ - fence(); verifyActiveStatusThread = new VerifyActiveStatusThread(); verifyActiveStatusThread.start(); } @@ -352,31 +353,41 @@ public class ZKRMStateStore extends RMStateStore { LOG.debug(builder.toString()); } - private synchronized void fence() throws Exception { - if (LOG.isTraceEnabled()) { - logRootNodeAcls("Before fencing\n"); - } - - new ZKAction() { - @Override - public Void run() throws KeeperException, InterruptedException { - zkClient.setACL(zkRootNodePath, zkRootNodeAcl, -1); - return null; - } - }.runWithRetries(); - - // delete fencingnodepath + private void deleteFencingNodePath() throws Exception { new ZKAction() { @Override public Void run() throws KeeperException, InterruptedException { try { zkClient.multi(Collections.singletonList(deleteFencingNodePathOp)); } catch (KeeperException.NoNodeException nne) { - LOG.info("Fencing node " + fencingNodePath + " doesn't exist to delete"); + LOG.info("Fencing node " + fencingNodePath + + " doesn't exist to delete"); } return null; } }.runWithRetries(); + } + + private void setAcl(final String zkPath, final List acl) + throws Exception { + new ZKAction() { + @Override + public Void run() throws KeeperException, InterruptedException { + zkClient.setACL(zkPath, acl, -1); + return null; + } + }.runWithRetries(); + } + + private void setRootNodeAcls() throws Exception { + if (LOG.isTraceEnabled()) { + logRootNodeAcls("Before fencing\n"); + } + if (HAUtil.isHAEnabled(getConfig())) { + setAcl(zkRootNodePath, zkRootNodeAcl); + } else { + setAcl(zkRootNodePath, zkAcl); + } if (LOG.isTraceEnabled()) { logRootNodeAcls("After fencing\n"); @@ -1027,7 +1038,8 @@ public class ZKRMStateStore extends RMStateStore { }.runWithRetries(); } - private List getACLWithRetries( + @VisibleForTesting + List getACLWithRetries( final String path, final Stat stat) throws Exception { return new ZKAction>() { @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java index 019915c506a..ea66c145bc5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java @@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.records.Version; import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; import org.apache.hadoop.yarn.server.resourcemanager.RMZKUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData; @@ -65,8 +66,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptS import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.ZooDefs.Perms; import org.apache.zookeeper.ZooKeeper; import org.apache.zookeeper.CreateMode; +import org.apache.zookeeper.data.ACL; import org.apache.zookeeper.data.Stat; import org.junit.Assert; import org.junit.Test; @@ -236,6 +239,73 @@ public class TestZKRMStateStore extends RMStateStoreTestBase { return conf; } + private static boolean verifyZKACL(String id, String scheme, int perm, + List acls) { + for (ACL acl : acls) { + if (acl.getId().getScheme().equals(scheme) && + acl.getId().getId().startsWith(id) && + acl.getPerms() == perm) { + return true; + } + } + return false; + } + + /** + * Test if RM can successfully start in HA disabled mode if it was previously + * running in HA enabled mode. And then start it in HA mode after running it + * with HA disabled. NoAuth Exception should not be sent by zookeeper and RM + * should start successfully. + */ + @Test + public void testZKRootPathAcls() throws Exception { + StateChangeRequestInfo req = new StateChangeRequestInfo( + HAServiceProtocol.RequestSource.REQUEST_BY_USER); + String rootPath = + YarnConfiguration.DEFAULT_ZK_RM_STATE_STORE_PARENT_PATH + "/" + + ZKRMStateStore.ROOT_ZNODE_NAME; + + // Start RM with HA enabled + Configuration conf = createHARMConf("rm1,rm2", "rm1", 1234); + conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false); + ResourceManager rm = new MockRM(conf); + rm.start(); + rm.getRMContext().getRMAdminService().transitionToActive(req); + Stat stat = new Stat(); + List acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()). + getACLWithRetries(rootPath, stat); + assertEquals(acls.size(), 2); + // CREATE and DELETE permissions for root node based on RM ID + verifyZKACL("digest", "localhost", Perms.CREATE | Perms.DELETE, acls); + verifyZKACL( + "world", "anyone", Perms.ALL ^ (Perms.CREATE | Perms.DELETE), acls); + rm.close(); + + // Now start RM with HA disabled. NoAuth Exception should not be thrown. + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, false); + rm = new MockRM(conf); + rm.start(); + rm.getRMContext().getRMAdminService().transitionToActive(req); + acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()). + getACLWithRetries(rootPath, stat); + assertEquals(acls.size(), 1); + verifyZKACL("world", "anyone", Perms.ALL, acls); + rm.close(); + + // Start RM with HA enabled. + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + rm = new MockRM(conf); + rm.start(); + rm.getRMContext().getRMAdminService().transitionToActive(req); + acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()). + getACLWithRetries(rootPath, stat); + assertEquals(acls.size(), 2); + verifyZKACL("digest", "localhost", Perms.CREATE | Perms.DELETE, acls); + verifyZKACL( + "world", "anyone", Perms.ALL ^ (Perms.CREATE | Perms.DELETE), acls); + rm.close(); + } + @SuppressWarnings("unchecked") @Test public void testFencing() throws Exception {