YARN-62. Modified NodeManagers to avoid AMs from abusing container tokens for repetitive container launches. Contributed by Omkar Vinit Joshi.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1503986 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-07-17 04:24:44 +00:00
parent 1b6324265d
commit 8b9c1e68ab
6 changed files with 125 additions and 78 deletions

View File

@ -68,6 +68,9 @@ Release 2.1.1-beta - UNRELEASED
YARN-820. Fixed an invalid state transition in NodeManager caused by failing
resource localization. (Mayank Bansal via vinodkv)
YARN-62. Modified NodeManagers to avoid AMs from abusing container tokens for
repetitive container launches. (Omkar Vinit Joshi via vinodkv)
Release 2.1.0-beta - 2013-07-02
INCOMPATIBLE CHANGES

View File

@ -24,8 +24,11 @@ import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
@ -166,7 +169,6 @@ public class ContainerManagerImpl extends CompositeService implements
new ContainersMonitorImpl(exec, dispatcher, this.context);
addService(this.containersMonitor);
dispatcher.register(ContainerEventType.class,
new ContainerEventDispatcher());
dispatcher.register(ApplicationEventType.class,
@ -345,7 +347,7 @@ public class ContainerManagerImpl extends CompositeService implements
.append(ugi.getUserName()).append(" Found: ")
.append(nmTokenIdentifier.getApplicationAttemptId().toString());
} else if (!this.context.getContainerTokenSecretManager()
.isValidStartContainerRequest(containerId)) {
.isValidStartContainerRequest(containerTokenIdentifier)) {
// Is the container being relaunched? Or RPC layer let startCall with
// tokens generated off old-secret through?
unauthorized = true;

View File

@ -392,9 +392,6 @@ public class ApplicationImpl implements Application {
@Override
public void transition(ApplicationImpl app, ApplicationEvent event) {
// Inform the ContainerTokenSecretManager
app.context.getContainerTokenSecretManager().appFinished(app.appId);
// Inform the logService
app.dispatcher.getEventHandler().handle(
new LogHandlerAppFinishedEvent(app.appId));

View File

@ -18,17 +18,17 @@
package org.apache.hadoop.yarn.server.nodemanager.security;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
@ -48,14 +48,15 @@ public class NMContainerTokenSecretManager extends
.getLog(NMContainerTokenSecretManager.class);
private MasterKeyData previousMasterKey;
private final TreeMap<Long, List<ContainerId>> recentlyStartedContainerTracker;
private final Map<ApplicationId, ConcurrentMap<ContainerId, MasterKeyData>> oldMasterKeys;
private String nodeHostAddr;
public NMContainerTokenSecretManager(Configuration conf) {
super(conf);
this.oldMasterKeys =
new HashMap<ApplicationId, ConcurrentMap<ContainerId, MasterKeyData>>();
recentlyStartedContainerTracker =
new TreeMap<Long, List<ContainerId>>();
}
/**
@ -93,9 +94,6 @@ public class NMContainerTokenSecretManager extends
public synchronized byte[] retrievePassword(
ContainerTokenIdentifier identifier) throws SecretManager.InvalidToken {
int keyId = identifier.getMasterKeyId();
ContainerId containerId = identifier.getContainerID();
ApplicationId appId =
containerId.getApplicationAttemptId().getApplicationId();
MasterKeyData masterKeyToUse = null;
if (this.previousMasterKey != null
@ -107,19 +105,6 @@ public class NMContainerTokenSecretManager extends
// A container-launch has come in with a token generated off the current
// master-key
masterKeyToUse = super.currentMasterKey;
} else if (this.oldMasterKeys.containsKey(appId)
&& this.oldMasterKeys.get(appId).containsKey(containerId)) {
// This means on the following happened:
// (1) a stopContainer() or a getStatus() happened for a container with
// token generated off a master-key that is neither current nor the
// previous one.
// (2) a container-relaunch has come in with a token generated off a
// master-key that is neither current nor the previous one.
// This basically lets stop and getStatus() calls with old-tokens to pass
// through without any issue, i.e. (1).
// Start-calls for repetitive launches (2) also pass through RPC here, but
// get thwarted at the app-layer as part of startContainer() call.
masterKeyToUse = this.oldMasterKeys.get(appId).get(containerId);
}
if (nodeHostAddr != null
@ -143,61 +128,64 @@ public class NMContainerTokenSecretManager extends
}
/**
* Container start has gone through. Store the corresponding keys so that
* stopContainer() and getContainerStatus() can be authenticated long after
* the container-start went through.
* Container start has gone through. We need to store the containerId in order
* to block future container start requests with same container token. This
* container token needs to be saved till its container token expires.
*/
public synchronized void startContainerSuccessful(
ContainerTokenIdentifier tokenId) {
int keyId = tokenId.getMasterKeyId();
if (currentMasterKey.getMasterKey().getKeyId() == keyId) {
addKeyForContainerId(tokenId.getContainerID(), currentMasterKey);
} else if (previousMasterKey != null
&& previousMasterKey.getMasterKey().getKeyId() == keyId) {
addKeyForContainerId(tokenId.getContainerID(), previousMasterKey);
removeAnyContainerTokenIfExpired();
Long expTime = tokenId.getExpiryTimeStamp();
// We might have multiple containers with same expiration time.
if (!recentlyStartedContainerTracker.containsKey(expTime)) {
recentlyStartedContainerTracker
.put(expTime, new ArrayList<ContainerId>());
}
recentlyStartedContainerTracker.get(expTime).add(tokenId.getContainerID());
}
protected synchronized void removeAnyContainerTokenIfExpired() {
// Trying to remove any container if its container token has expired.
Iterator<Entry<Long, List<ContainerId>>> containersI =
this.recentlyStartedContainerTracker.entrySet().iterator();
Long currTime = System.currentTimeMillis();
while (containersI.hasNext()) {
Entry<Long, List<ContainerId>> containerEntry = containersI.next();
if (containerEntry.getKey() < currTime) {
containersI.remove();
} else {
break;
}
}
}
/**
* Ensure the startContainer call is not using an older cached key. Will
* return false once startContainerSuccessful is called. Does not check
* the actual key being current since that is verified by the security layer
* via retrievePassword.
* Container will be remembered based on expiration time of the container
* token used for starting the container. It is safe to use expiration time
* as there is one to many mapping between expiration time and containerId.
* @return true if the current token identifier is not present in cache.
*/
public synchronized boolean isValidStartContainerRequest(
ContainerId containerID) {
ApplicationId applicationId =
containerID.getApplicationAttemptId().getApplicationId();
return !this.oldMasterKeys.containsKey(applicationId)
|| !this.oldMasterKeys.get(applicationId).containsKey(containerID);
}
ContainerTokenIdentifier containerTokenIdentifier) {
private synchronized void addKeyForContainerId(ContainerId containerId,
MasterKeyData masterKeyData) {
if (containerId != null) {
ApplicationId appId =
containerId.getApplicationAttemptId().getApplicationId();
if (!this.oldMasterKeys.containsKey(appId)) {
this.oldMasterKeys.put(appId,
new ConcurrentHashMap<ContainerId, MasterKeyData>());
}
ConcurrentMap<ContainerId, MasterKeyData> containerIdToKeysMapForThisApp =
this.oldMasterKeys.get(appId);
containerIdToKeysMapForThisApp.put(containerId, masterKeyData);
removeAnyContainerTokenIfExpired();
Long expTime = containerTokenIdentifier.getExpiryTimeStamp();
List<ContainerId> containers =
this.recentlyStartedContainerTracker.get(expTime);
if (containers == null
|| !containers.contains(containerTokenIdentifier.getContainerID())) {
return true;
} else {
LOG.warn("Not adding key for null containerId");
return false;
}
}
// Holding on to master-keys corresponding to containers until the app is
// finished due to the multiple ways a container can finish. Avoid
// stopContainer calls seeing unnecessary authorization exceptions.
public synchronized void appFinished(ApplicationId appId) {
this.oldMasterKeys.remove(appId);
}
public synchronized void setNodeId(NodeId nodeId) {
nodeHostAddr = nodeId.toString();
LOG.info("Updating node address : " + nodeHostAddr);
}
}
}

View File

@ -29,6 +29,7 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import junit.framework.Assert;
@ -265,9 +266,12 @@ public class TestApplication {
AuxServicesEventType.APPLICATION_STOP, wa.appId)));
wa.appResourcesCleanedup();
for ( Container container : wa.containers) {
for (Container container : wa.containers) {
ContainerTokenIdentifier identifier =
wa.getContainerTokenIdentifier(container.getContainerId());
waitForContainerTokenToExpire(identifier);
Assert.assertTrue(wa.context.getContainerTokenSecretManager()
.isValidStartContainerRequest(container.getContainerId()));
.isValidStartContainerRequest(identifier));
}
assertEquals(ApplicationState.FINISHED, wa.app.getApplicationState());
@ -277,6 +281,18 @@ public class TestApplication {
}
}
protected ContainerTokenIdentifier waitForContainerTokenToExpire(
ContainerTokenIdentifier identifier) {
int attempts = 5;
while (System.currentTimeMillis() < identifier.getExpiryTimeStamp()
&& attempts-- > 0) {
try {
Thread.sleep(1000);
} catch (Exception e) {}
}
return identifier;
}
@Test
@SuppressWarnings("unchecked")
public void testAppFinishedOnCompletedContainers() {
@ -306,8 +322,11 @@ public class TestApplication {
wa.appResourcesCleanedup();
for ( Container container : wa.containers) {
ContainerTokenIdentifier identifier =
wa.getContainerTokenIdentifier(container.getContainerId());
waitForContainerTokenToExpire(identifier);
Assert.assertTrue(wa.context.getContainerTokenSecretManager()
.isValidStartContainerRequest(container.getContainerId()));
.isValidStartContainerRequest(identifier));
}
assertEquals(ApplicationState.FINISHED, wa.app.getApplicationState());
} finally {
@ -440,7 +459,8 @@ public class TestApplication {
final String user;
final List<Container> containers;
final Context context;
final Map<ContainerId, ContainerTokenIdentifier> containerTokenIdentifierMap;
final ApplicationId appId;
final Application app;
@ -448,6 +468,8 @@ public class TestApplication {
Configuration conf = new Configuration();
dispatcher = new DrainDispatcher();
containerTokenIdentifierMap =
new HashMap<ContainerId, ContainerTokenIdentifier>();
dispatcher.init(conf);
localizerBus = mock(EventHandler.class);
@ -486,11 +508,15 @@ public class TestApplication {
Container container = createMockedContainer(this.appId, i);
containers.add(container);
long currentTime = System.currentTimeMillis();
ContainerTokenIdentifier identifier =
new ContainerTokenIdentifier(container.getContainerId(), "", "",
null, currentTime + 2000, masterKey.getKeyId(), currentTime);
containerTokenIdentifierMap
.put(identifier.getContainerID(), identifier);
context.getContainerTokenSecretManager().startContainerSuccessful(
new ContainerTokenIdentifier(container.getContainerId(), "",
"", null, currentTime + 1000, masterKey.getKeyId(), currentTime));
identifier);
Assert.assertFalse(context.getContainerTokenSecretManager()
.isValidStartContainerRequest(container.getContainerId()));
.isValidStartContainerRequest(identifier));
}
dispatcher.start();
@ -542,6 +568,11 @@ public class TestApplication {
ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP));
drainDispatcherEvents();
}
public ContainerTokenIdentifier getContainerTokenIdentifier(
ContainerId containerId) {
return this.containerTokenIdentifierMap.get(containerId);
}
}
private Container createMockedContainer(ApplicationId appId, int containerId) {

View File

@ -42,12 +42,12 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.Token;
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
@ -211,12 +211,25 @@ public class TestContainerManagerSecurity {
Assert.assertTrue(testStartContainer(rpc, validAppAttemptId, validNode,
validContainerToken, invalidNMToken, true).contains(sb.toString()));
// using correct tokens. nmtoken for appattempt should get saved.
// using correct tokens. nmtoken for app attempt should get saved.
conf.setInt(YarnConfiguration.RM_CONTAINER_ALLOC_EXPIRY_INTERVAL_MS,
4 * 60 * 1000);
validContainerToken =
containerTokenSecretManager.createContainerToken(validContainerId,
validNode, user, r);
testStartContainer(rpc, validAppAttemptId, validNode, validContainerToken,
validNMToken, false);
Assert.assertTrue(nmTokenSecretManagerNM
.isAppAttemptNMTokenKeyPresent(validAppAttemptId));
//Now lets wait till container finishes and is removed from node manager.
waitForContainerToFinishOnNM(validContainerId);
sb = new StringBuilder("Attempt to relaunch the same container with id ");
sb.append(validContainerId);
Assert.assertTrue(testStartContainer(rpc, validAppAttemptId, validNode,
validContainerToken, validNMToken, true).contains(sb.toString()));
// Rolling over master key twice so that we can check whether older keys
// are used for authentication.
rollNMTokenMasterKey(nmTokenSecretManagerRM, nmTokenSecretManagerNM);
@ -233,6 +246,19 @@ public class TestContainerManagerSecurity {
}
private void waitForContainerToFinishOnNM(ContainerId containerId) {
Context nmContet = yarnCluster.getNodeManager(0).getNMContext();
int interval = 4 * 60; // Max time for container token to expire.
while ((interval-- > 0)
&& nmContet.getContainers().containsKey(containerId)) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
}
}
Assert.assertFalse(nmContet.getContainers().containsKey(containerId));
}
protected void waitForNMToReceiveNMTokenKey(
NMTokenSecretManagerInNM nmTokenSecretManagerNM, NodeManager nm)
throws InterruptedException {