YARN-4127. RM fail with noAuth error if switched from failover mode to non-failover mode. Contributed by Varun Saxena

This commit is contained in:
Jian He 2015-11-03 15:33:13 -08:00
parent 09348c4bce
commit 7029fc356f
3 changed files with 102 additions and 17 deletions

View File

@ -118,6 +118,9 @@ Release 2.7.2 - UNRELEASED
YARN-3136. getTransferredContainers can be a bottleneck during AM registration.
(Sunil G via jianhe)
YARN-4127. RM fail with noAuth error if switched from failover mode to non-failover
mode. (Varun Saxena via jianhe)
Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES

View File

@ -305,8 +305,9 @@ public class ZKRMStateStore extends RMStateStore {
// ensure root dirs exist
createRootDirRecursively(znodeWorkingPath);
createRootDir(zkRootNodePath);
setRootNodeAcls();
deleteFencingNodePath();
if (HAUtil.isHAEnabled(getConfig())){
fence();
verifyActiveStatusThread = new VerifyActiveStatusThread();
verifyActiveStatusThread.start();
}
@ -352,31 +353,41 @@ public class ZKRMStateStore extends RMStateStore {
LOG.debug(builder.toString());
}
private synchronized void fence() throws Exception {
if (LOG.isTraceEnabled()) {
logRootNodeAcls("Before fencing\n");
}
new ZKAction<Void>() {
@Override
public Void run() throws KeeperException, InterruptedException {
zkClient.setACL(zkRootNodePath, zkRootNodeAcl, -1);
return null;
}
}.runWithRetries();
// delete fencingnodepath
private void deleteFencingNodePath() throws Exception {
new ZKAction<Void>() {
@Override
public Void run() throws KeeperException, InterruptedException {
try {
zkClient.multi(Collections.singletonList(deleteFencingNodePathOp));
} catch (KeeperException.NoNodeException nne) {
LOG.info("Fencing node " + fencingNodePath + " doesn't exist to delete");
LOG.info("Fencing node " + fencingNodePath +
" doesn't exist to delete");
}
return null;
}
}.runWithRetries();
}
private void setAcl(final String zkPath, final List<ACL> acl)
throws Exception {
new ZKAction<Void>() {
@Override
public Void run() throws KeeperException, InterruptedException {
zkClient.setACL(zkPath, acl, -1);
return null;
}
}.runWithRetries();
}
private void setRootNodeAcls() throws Exception {
if (LOG.isTraceEnabled()) {
logRootNodeAcls("Before fencing\n");
}
if (HAUtil.isHAEnabled(getConfig())) {
setAcl(zkRootNodePath, zkRootNodeAcl);
} else {
setAcl(zkRootNodePath, zkAcl);
}
if (LOG.isTraceEnabled()) {
logRootNodeAcls("After fencing\n");
@ -1027,7 +1038,8 @@ public class ZKRMStateStore extends RMStateStore {
}.runWithRetries();
}
private List<ACL> getACLWithRetries(
@VisibleForTesting
List<ACL> getACLWithRetries(
final String path, final Stat stat) throws Exception {
return new ZKAction<List<ACL>>() {
@Override

View File

@ -53,6 +53,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.records.Version;
import org.apache.hadoop.yarn.server.records.impl.pb.VersionPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.MockRM;
import org.apache.hadoop.yarn.server.resourcemanager.RMZKUtils;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData;
@ -65,8 +66,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptS
import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs.Perms;
import org.apache.zookeeper.ZooKeeper;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.data.ACL;
import org.apache.zookeeper.data.Stat;
import org.junit.Assert;
import org.junit.Test;
@ -236,6 +239,73 @@ public class TestZKRMStateStore extends RMStateStoreTestBase {
return conf;
}
private static boolean verifyZKACL(String id, String scheme, int perm,
List<ACL> acls) {
for (ACL acl : acls) {
if (acl.getId().getScheme().equals(scheme) &&
acl.getId().getId().startsWith(id) &&
acl.getPerms() == perm) {
return true;
}
}
return false;
}
/**
* Test if RM can successfully start in HA disabled mode if it was previously
* running in HA enabled mode. And then start it in HA mode after running it
* with HA disabled. NoAuth Exception should not be sent by zookeeper and RM
* should start successfully.
*/
@Test
public void testZKRootPathAcls() throws Exception {
StateChangeRequestInfo req = new StateChangeRequestInfo(
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
String rootPath =
YarnConfiguration.DEFAULT_ZK_RM_STATE_STORE_PARENT_PATH + "/" +
ZKRMStateStore.ROOT_ZNODE_NAME;
// Start RM with HA enabled
Configuration conf = createHARMConf("rm1,rm2", "rm1", 1234);
conf.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false);
ResourceManager rm = new MockRM(conf);
rm.start();
rm.getRMContext().getRMAdminService().transitionToActive(req);
Stat stat = new Stat();
List<ACL> acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()).
getACLWithRetries(rootPath, stat);
assertEquals(acls.size(), 2);
// CREATE and DELETE permissions for root node based on RM ID
verifyZKACL("digest", "localhost", Perms.CREATE | Perms.DELETE, acls);
verifyZKACL(
"world", "anyone", Perms.ALL ^ (Perms.CREATE | Perms.DELETE), acls);
rm.close();
// Now start RM with HA disabled. NoAuth Exception should not be thrown.
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, false);
rm = new MockRM(conf);
rm.start();
rm.getRMContext().getRMAdminService().transitionToActive(req);
acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()).
getACLWithRetries(rootPath, stat);
assertEquals(acls.size(), 1);
verifyZKACL("world", "anyone", Perms.ALL, acls);
rm.close();
// Start RM with HA enabled.
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
rm = new MockRM(conf);
rm.start();
rm.getRMContext().getRMAdminService().transitionToActive(req);
acls = ((ZKRMStateStore)rm.getRMContext().getStateStore()).
getACLWithRetries(rootPath, stat);
assertEquals(acls.size(), 2);
verifyZKACL("digest", "localhost", Perms.CREATE | Perms.DELETE, acls);
verifyZKACL(
"world", "anyone", Perms.ALL ^ (Perms.CREATE | Perms.DELETE), acls);
rm.close();
}
@SuppressWarnings("unchecked")
@Test
public void testFencing() throws Exception {