YARN-60. Fixed a bug in ResourceManager which causes all NMs to get NPEs and thus causes all containers to be rejected. Contributed by Vinod Kumar Vavilapalli.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1379550 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2012-08-31 19:11:05 +00:00
parent 38d003a6db
commit 45a8e8c5a4
20 changed files with 242 additions and 89 deletions

View File

@ -74,3 +74,5 @@ Release 0.23.3 - Unreleased
YARN-63. RMNodeImpl is missing valid transitions from the UNHEALTHY state YARN-63. RMNodeImpl is missing valid transitions from the UNHEALTHY state
(Jason Lowe via bobby) (Jason Lowe via bobby)
YARN-60. Fixed a bug in ResourceManager which causes all NMs to get NPEs and
thus causes all containers to be rejected. (vinodkv)

View File

@ -18,11 +18,14 @@
package org.apache.hadoop.yarn.server.api.protocolrecords; package org.apache.hadoop.yarn.server.api.protocolrecords;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.api.records.NodeStatus;
public interface NodeHeartbeatRequest { public interface NodeHeartbeatRequest {
public abstract NodeStatus getNodeStatus();
NodeStatus getNodeStatus();
public abstract void setNodeStatus(NodeStatus status); void setNodeStatus(NodeStatus status);
MasterKey getLastKnownMasterKey();
void setLastKnownMasterKey(MasterKey secretKey);
} }

View File

@ -18,24 +18,25 @@
package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
import org.apache.hadoop.yarn.api.records.ProtoBase; import org.apache.hadoop.yarn.api.records.ProtoBase;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProto; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProtoOrBuilder; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatRequestProtoOrBuilder;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.api.records.NodeStatus;
import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl;
import org.apache.hadoop.yarn.server.api.records.impl.pb.NodeStatusPBImpl; import org.apache.hadoop.yarn.server.api.records.impl.pb.NodeStatusPBImpl;
public class NodeHeartbeatRequestPBImpl extends
ProtoBase<NodeHeartbeatRequestProto> implements NodeHeartbeatRequest {
public class NodeHeartbeatRequestPBImpl extends ProtoBase<NodeHeartbeatRequestProto> implements NodeHeartbeatRequest {
NodeHeartbeatRequestProto proto = NodeHeartbeatRequestProto.getDefaultInstance(); NodeHeartbeatRequestProto proto = NodeHeartbeatRequestProto.getDefaultInstance();
NodeHeartbeatRequestProto.Builder builder = null; NodeHeartbeatRequestProto.Builder builder = null;
boolean viaProto = false; boolean viaProto = false;
private NodeStatus nodeStatus = null; private NodeStatus nodeStatus = null;
private MasterKey lastKnownMasterKey = null;
public NodeHeartbeatRequestPBImpl() { public NodeHeartbeatRequestPBImpl() {
builder = NodeHeartbeatRequestProto.newBuilder(); builder = NodeHeartbeatRequestProto.newBuilder();
@ -57,6 +58,10 @@ public class NodeHeartbeatRequestPBImpl extends ProtoBase<NodeHeartbeatRequestPr
if (this.nodeStatus != null) { if (this.nodeStatus != null) {
builder.setNodeStatus(convertToProtoFormat(this.nodeStatus)); builder.setNodeStatus(convertToProtoFormat(this.nodeStatus));
} }
if (this.lastKnownMasterKey != null) {
builder
.setLastKnownMasterKey(convertToProtoFormat(this.lastKnownMasterKey));
}
} }
private void mergeLocalToProto() { private void mergeLocalToProto() {
@ -96,6 +101,27 @@ public class NodeHeartbeatRequestPBImpl extends ProtoBase<NodeHeartbeatRequestPr
this.nodeStatus = nodeStatus; this.nodeStatus = nodeStatus;
} }
@Override
public MasterKey getLastKnownMasterKey() {
NodeHeartbeatRequestProtoOrBuilder p = viaProto ? proto : builder;
if (this.lastKnownMasterKey != null) {
return this.lastKnownMasterKey;
}
if (!p.hasLastKnownMasterKey()) {
return null;
}
this.lastKnownMasterKey = convertFromProtoFormat(p.getLastKnownMasterKey());
return this.lastKnownMasterKey;
}
@Override
public void setLastKnownMasterKey(MasterKey masterKey) {
maybeInitBuilder();
if (masterKey == null)
builder.clearLastKnownMasterKey();
this.lastKnownMasterKey = masterKey;
}
private NodeStatusPBImpl convertFromProtoFormat(NodeStatusProto p) { private NodeStatusPBImpl convertFromProtoFormat(NodeStatusProto p) {
return new NodeStatusPBImpl(p); return new NodeStatusPBImpl(p);
} }
@ -104,6 +130,11 @@ public class NodeHeartbeatRequestPBImpl extends ProtoBase<NodeHeartbeatRequestPr
return ((NodeStatusPBImpl)t).getProto(); return ((NodeStatusPBImpl)t).getProto();
} }
private MasterKeyPBImpl convertFromProtoFormat(MasterKeyProto p) {
return new MasterKeyPBImpl(p);
}
private MasterKeyProto convertToProtoFormat(MasterKey t) {
return ((MasterKeyPBImpl)t).getProto();
}
} }

View File

@ -35,6 +35,7 @@ message RegisterNodeManagerResponseProto {
message NodeHeartbeatRequestProto { message NodeHeartbeatRequestProto {
optional NodeStatusProto node_status = 1; optional NodeStatusProto node_status = 1;
optional MasterKeyProto last_known_master_key = 2;
} }
message NodeHeartbeatResponseProto { message NodeHeartbeatResponseProto {

View File

@ -31,6 +31,7 @@ import java.util.Random;
import org.apache.avro.AvroRuntimeException; import org.apache.avro.AvroRuntimeException;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.YarnException;
@ -111,10 +112,7 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
this.totalResource = recordFactory.newRecordInstance(Resource.class); this.totalResource = recordFactory.newRecordInstance(Resource.class);
this.totalResource.setMemory(memoryMb); this.totalResource.setMemory(memoryMb);
metrics.addResource(totalResource); metrics.addResource(totalResource);
this.tokenKeepAliveEnabled = this.tokenKeepAliveEnabled = isTokenKeepAliveEnabled(conf);
conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED)
&& isSecurityEnabled();
this.tokenRemovalDelayMs = this.tokenRemovalDelayMs =
conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS, conf.getInt(YarnConfiguration.RM_NM_EXPIRY_INTERVAL_MS,
YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS); YarnConfiguration.DEFAULT_RM_NM_EXPIRY_INTERVAL_MS);
@ -163,10 +161,17 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
return this.hasToRebootNode; return this.hasToRebootNode;
} }
protected boolean isSecurityEnabled() { private boolean isSecurityEnabled() {
return UserGroupInformation.isSecurityEnabled(); return UserGroupInformation.isSecurityEnabled();
} }
@Private
protected boolean isTokenKeepAliveEnabled(Configuration conf) {
return conf.getBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED,
YarnConfiguration.DEFAULT_LOG_AGGREGATION_ENABLED)
&& isSecurityEnabled();
}
protected ResourceTracker getRMClient() { protected ResourceTracker getRMClient() {
Configuration conf = getConfig(); Configuration conf = getConfig();
YarnRPC rpc = YarnRPC.create(conf); YarnRPC rpc = YarnRPC.create(conf);
@ -321,7 +326,11 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
NodeHeartbeatRequest request = recordFactory NodeHeartbeatRequest request = recordFactory
.newRecordInstance(NodeHeartbeatRequest.class); .newRecordInstance(NodeHeartbeatRequest.class);
request.setNodeStatus(nodeStatus); request.setNodeStatus(nodeStatus);
if (isSecurityEnabled()) {
request.setLastKnownMasterKey(NodeStatusUpdaterImpl.this.context
.getContainerTokenSecretManager().getCurrentKey());
}
HeartbeatResponse response = HeartbeatResponse response =
resourceTracker.nodeHeartbeat(request).getHeartbeatResponse(); resourceTracker.nodeHeartbeat(request).getHeartbeatResponse();

View File

@ -92,7 +92,6 @@ public class NMContainerTokenSecretManager extends
containerId.getApplicationAttemptId().getApplicationId(); containerId.getApplicationAttemptId().getApplicationId();
MasterKeyData masterKeyToUse = null; MasterKeyData masterKeyToUse = null;
if (this.previousMasterKey != null if (this.previousMasterKey != null
&& keyId == this.previousMasterKey.getMasterKey().getKeyId()) { && keyId == this.previousMasterKey.getMasterKey().getKeyId()) {
// A container-launch has come in with a token generated off the last // A container-launch has come in with a token generated off the last

View File

@ -261,7 +261,7 @@ public class TestNodeStatusUpdater {
} }
@Override @Override
protected boolean isSecurityEnabled() { protected boolean isTokenKeepAliveEnabled(Configuration conf) {
return true; return true;
} }
} }

View File

@ -159,7 +159,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
DelegationTokenRenewer tokenRenewer = createDelegationTokenRenewer(); DelegationTokenRenewer tokenRenewer = createDelegationTokenRenewer();
addService(tokenRenewer); addService(tokenRenewer);
this.containerTokenSecretManager = new RMContainerTokenSecretManager(conf); this.containerTokenSecretManager = createContainerTokenSecretManager(conf);
this.rmContext = this.rmContext =
new RMContextImpl(this.store, this.rmDispatcher, new RMContextImpl(this.store, this.rmDispatcher,
@ -231,6 +231,11 @@ public class ResourceManager extends CompositeService implements Recoverable {
super.init(conf); super.init(conf);
} }
protected RMContainerTokenSecretManager createContainerTokenSecretManager(
Configuration conf) {
return new RMContainerTokenSecretManager(conf);
}
protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() { protected EventHandler<SchedulerEvent> createSchedulerEventDispatcher() {
return new SchedulerEventDispatcher(this.scheduler); return new SchedulerEventDispatcher(this.scheduler);
} }

View File

@ -169,14 +169,14 @@ public class ResourceTrackerService extends AbstractService implements
return response; return response;
} }
MasterKey nextMasterKeyForNode = null;
if (isSecurityEnabled()) { if (isSecurityEnabled()) {
nextMasterKeyForNode = this.containerTokenSecretManager.getCurrentKey(); MasterKey nextMasterKeyForNode =
this.containerTokenSecretManager.getCurrentKey();
regResponse.setMasterKey(nextMasterKeyForNode); regResponse.setMasterKey(nextMasterKeyForNode);
} }
RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort, RMNode rmNode = new RMNodeImpl(nodeId, rmContext, host, cmPort, httpPort,
resolve(host), capability, nextMasterKeyForNode); resolve(host), capability);
RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode); RMNode oldNode = this.rmContext.getRMNodes().putIfAbsent(nodeId, rmNode);
if (oldNode == null) { if (oldNode == null) {
@ -266,17 +266,18 @@ public class ResourceTrackerService extends AbstractService implements
latestResponse.addAllApplicationsToCleanup(rmNode.getAppsToCleanup()); latestResponse.addAllApplicationsToCleanup(rmNode.getAppsToCleanup());
latestResponse.setNodeAction(NodeAction.NORMAL); latestResponse.setNodeAction(NodeAction.NORMAL);
MasterKey nextMasterKeyForNode = null;
// Check if node's masterKey needs to be updated and if the currentKey has // Check if node's masterKey needs to be updated and if the currentKey has
// roller over, send it across // roller over, send it across
if (isSecurityEnabled()) { if (isSecurityEnabled()) {
boolean shouldSendMasterKey = false; boolean shouldSendMasterKey = false;
MasterKey nodeKnownMasterKey = rmNode.getCurrentMasterKey();
nextMasterKeyForNode = this.containerTokenSecretManager.getNextKey(); MasterKey nextMasterKeyForNode =
this.containerTokenSecretManager.getNextKey();
if (nextMasterKeyForNode != null) { if (nextMasterKeyForNode != null) {
// nextMasterKeyForNode can be null if there is no outstanding key that // nextMasterKeyForNode can be null if there is no outstanding key that
// is in the activation period. // is in the activation period.
MasterKey nodeKnownMasterKey = request.getLastKnownMasterKey();
if (nodeKnownMasterKey.getKeyId() != nextMasterKeyForNode.getKeyId()) { if (nodeKnownMasterKey.getKeyId() != nextMasterKeyForNode.getKeyId()) {
shouldSendMasterKey = true; shouldSendMasterKey = true;
} }
@ -290,8 +291,7 @@ public class ResourceTrackerService extends AbstractService implements
this.rmContext.getDispatcher().getEventHandler().handle( this.rmContext.getDispatcher().getEventHandler().handle(
new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(), new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(),
remoteNodeStatus.getContainersStatuses(), remoteNodeStatus.getContainersStatuses(),
remoteNodeStatus.getKeepAliveApplications(), latestResponse, remoteNodeStatus.getKeepAliveApplications(), latestResponse));
nextMasterKeyForNode));
nodeHeartBeatResponse.setHeartbeatResponse(latestResponse); nodeHeartBeatResponse.setHeartbeatResponse(latestResponse);
return nodeHeartBeatResponse; return nodeHeartBeatResponse;

View File

@ -28,7 +28,6 @@ import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
/** /**
* Node managers information on available resources * Node managers information on available resources
@ -107,6 +106,4 @@ public interface RMNode {
public List<ApplicationId> getAppsToCleanup(); public List<ApplicationId> getAppsToCleanup();
public HeartbeatResponse getLastHeartBeatResponse(); public HeartbeatResponse getLastHeartBeatResponse();
public MasterKey getCurrentMasterKey();
} }

View File

@ -46,7 +46,6 @@ import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics; import org.apache.hadoop.yarn.server.resourcemanager.ClusterMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEvent; import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEventType; import org.apache.hadoop.yarn.server.resourcemanager.NodesListManagerEventType;
@ -105,8 +104,6 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
private HeartbeatResponse latestHeartBeatResponse = recordFactory private HeartbeatResponse latestHeartBeatResponse = recordFactory
.newRecordInstance(HeartbeatResponse.class); .newRecordInstance(HeartbeatResponse.class);
private MasterKey currentMasterKey;
private static final StateMachineFactory<RMNodeImpl, private static final StateMachineFactory<RMNodeImpl,
NodeState, NodeState,
RMNodeEventType, RMNodeEventType,
@ -167,8 +164,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
RMNodeEvent> stateMachine; RMNodeEvent> stateMachine;
public RMNodeImpl(NodeId nodeId, RMContext context, String hostName, public RMNodeImpl(NodeId nodeId, RMContext context, String hostName,
int cmPort, int httpPort, Node node, Resource capability, int cmPort, int httpPort, Node node, Resource capability) {
MasterKey masterKey) {
this.nodeId = nodeId; this.nodeId = nodeId;
this.context = context; this.context = context;
this.hostName = hostName; this.hostName = hostName;
@ -178,7 +174,6 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
this.nodeAddress = hostName + ":" + cmPort; this.nodeAddress = hostName + ":" + cmPort;
this.httpAddress = hostName + ":" + httpPort; this.httpAddress = hostName + ":" + httpPort;
this.node = node; this.node = node;
this.currentMasterKey = masterKey;
this.nodeHealthStatus.setIsNodeHealthy(true); this.nodeHealthStatus.setIsNodeHealthy(true);
this.nodeHealthStatus.setHealthReport("Healthy"); this.nodeHealthStatus.setHealthReport("Healthy");
this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis()); this.nodeHealthStatus.setLastHealthReportTime(System.currentTimeMillis());
@ -312,17 +307,6 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
this.readLock.unlock(); this.readLock.unlock();
} }
} }
@Override
public MasterKey getCurrentMasterKey() {
this.readLock.lock();
try {
return this.currentMasterKey;
} finally {
this.readLock.unlock();
}
}
public void handle(RMNodeEvent event) { public void handle(RMNodeEvent event) {
LOG.debug("Processing " + event.getNodeId() + " of type " + event.getType()); LOG.debug("Processing " + event.getNodeId() + " of type " + event.getType());
@ -500,7 +484,6 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
// Switch the last heartbeatresponse. // Switch the last heartbeatresponse.
rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse(); rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse();
rmNode.currentMasterKey = statusEvent.getCurrentMasterKey();
NodeHealthStatus remoteNodeHealthStatus = NodeHealthStatus remoteNodeHealthStatus =
statusEvent.getNodeHealthStatus(); statusEvent.getNodeHealthStatus();
@ -582,7 +565,6 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
// Switch the last heartbeatresponse. // Switch the last heartbeatresponse.
rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse(); rmNode.latestHeartBeatResponse = statusEvent.getLatestResponse();
rmNode.currentMasterKey = statusEvent.getCurrentMasterKey();
NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus(); NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus();
rmNode.setNodeHealthStatus(remoteNodeHealthStatus); rmNode.setNodeHealthStatus(remoteNodeHealthStatus);
if (remoteNodeHealthStatus.getIsNodeHealthy()) { if (remoteNodeHealthStatus.getIsNodeHealthy()) {

View File

@ -25,7 +25,6 @@ import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
public class RMNodeStatusEvent extends RMNodeEvent { public class RMNodeStatusEvent extends RMNodeEvent {
@ -33,17 +32,15 @@ public class RMNodeStatusEvent extends RMNodeEvent {
private final List<ContainerStatus> containersCollection; private final List<ContainerStatus> containersCollection;
private final HeartbeatResponse latestResponse; private final HeartbeatResponse latestResponse;
private final List<ApplicationId> keepAliveAppIds; private final List<ApplicationId> keepAliveAppIds;
private final MasterKey currentMasterKey;
public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus,
List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds, List<ContainerStatus> collection, List<ApplicationId> keepAliveAppIds,
HeartbeatResponse latestResponse, MasterKey currentMasterKey) { HeartbeatResponse latestResponse) {
super(nodeId, RMNodeEventType.STATUS_UPDATE); super(nodeId, RMNodeEventType.STATUS_UPDATE);
this.nodeHealthStatus = nodeHealthStatus; this.nodeHealthStatus = nodeHealthStatus;
this.containersCollection = collection; this.containersCollection = collection;
this.keepAliveAppIds = keepAliveAppIds; this.keepAliveAppIds = keepAliveAppIds;
this.latestResponse = latestResponse; this.latestResponse = latestResponse;
this.currentMasterKey = currentMasterKey;
} }
public NodeHealthStatus getNodeHealthStatus() { public NodeHealthStatus getNodeHealthStatus() {
@ -61,8 +58,4 @@ public class RMNodeStatusEvent extends RMNodeEvent {
public List<ApplicationId> getKeepAliveAppIds() { public List<ApplicationId> getKeepAliveAppIds() {
return this.keepAliveAppIds; return this.keepAliveAppIds;
} }
public MasterKey getCurrentMasterKey() {
return this.currentMasterKey;
}
} }

View File

@ -89,7 +89,7 @@ public class RMContainerTokenSecretManager extends
* Creates a new master-key and sets it as the primary. * Creates a new master-key and sets it as the primary.
*/ */
@Private @Private
protected void rollMasterKey() { public void rollMasterKey() {
super.writeLock.lock(); super.writeLock.lock();
try { try {
LOG.info("Rolling master-key for container-tokens"); LOG.info("Rolling master-key for container-tokens");
@ -97,6 +97,9 @@ public class RMContainerTokenSecretManager extends
this.currentMasterKey = createNewMasterKey(); this.currentMasterKey = createNewMasterKey();
} else { } else {
this.nextMasterKey = createNewMasterKey(); this.nextMasterKey = createNewMasterKey();
LOG.info("Going to activate master-key with key-id "
+ this.nextMasterKey.getMasterKey().getKeyId() + " in "
+ this.activationDelay + "ms");
this.timer.schedule(new NextKeyActivator(), this.activationDelay); this.timer.schedule(new NextKeyActivator(), this.activationDelay);
} }
} finally { } finally {
@ -122,7 +125,7 @@ public class RMContainerTokenSecretManager extends
* Activate the new master-key * Activate the new master-key
*/ */
@Private @Private
protected void activateNextMasterKey() { public void activateNextMasterKey() {
super.writeLock.lock(); super.writeLock.lock();
try { try {
LOG.info("Activating next master key with id: " LOG.info("Activating next master key with id: "

View File

@ -35,7 +35,9 @@ import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeStatus; import org.apache.hadoop.yarn.server.api.records.NodeStatus;
import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.BuilderUtils;
import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.Records;
@ -46,8 +48,9 @@ public class MockNM {
private final int memory; private final int memory;
private final ResourceTrackerService resourceTracker; private final ResourceTrackerService resourceTracker;
private final int httpPort = 2; private final int httpPort = 2;
private MasterKey currentMasterKey;
MockNM(String nodeIdStr, int memory, ResourceTrackerService resourceTracker) { public MockNM(String nodeIdStr, int memory, ResourceTrackerService resourceTracker) {
this.memory = memory; this.memory = memory;
this.resourceTracker = resourceTracker; this.resourceTracker = resourceTracker;
String[] splits = nodeIdStr.split(":"); String[] splits = nodeIdStr.split(":");
@ -72,7 +75,7 @@ public class MockNM {
nodeHeartbeat(conts, true); nodeHeartbeat(conts, true);
} }
public NodeId registerNode() throws Exception { public RegistrationResponse registerNode() throws Exception {
RegisterNodeManagerRequest req = Records.newRecord( RegisterNodeManagerRequest req = Records.newRecord(
RegisterNodeManagerRequest.class); RegisterNodeManagerRequest.class);
req.setNodeId(nodeId); req.setNodeId(nodeId);
@ -80,13 +83,15 @@ public class MockNM {
Resource resource = Records.newRecord(Resource.class); Resource resource = Records.newRecord(Resource.class);
resource.setMemory(memory); resource.setMemory(memory);
req.setResource(resource); req.setResource(resource);
resourceTracker.registerNodeManager(req); RegistrationResponse registrationResponse =
return nodeId; resourceTracker.registerNodeManager(req).getRegistrationResponse();
this.currentMasterKey = registrationResponse.getMasterKey();
return registrationResponse;
} }
public HeartbeatResponse nodeHeartbeat(boolean b) throws Exception { public HeartbeatResponse nodeHeartbeat(boolean isHealthy) throws Exception {
return nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(), return nodeHeartbeat(new HashMap<ApplicationId, List<ContainerStatus>>(),
b, ++responseId); isHealthy, ++responseId);
} }
public HeartbeatResponse nodeHeartbeat(ApplicationAttemptId attemptId, public HeartbeatResponse nodeHeartbeat(ApplicationAttemptId attemptId,
@ -123,7 +128,15 @@ public class MockNM {
healthStatus.setLastHealthReportTime(1); healthStatus.setLastHealthReportTime(1);
status.setNodeHealthStatus(healthStatus); status.setNodeHealthStatus(healthStatus);
req.setNodeStatus(status); req.setNodeStatus(status);
return resourceTracker.nodeHeartbeat(req).getHeartbeatResponse(); req.setLastKnownMasterKey(this.currentMasterKey);
HeartbeatResponse heartbeatResponse =
resourceTracker.nodeHeartbeat(req).getHeartbeatResponse();
MasterKey masterKeyFromRM = heartbeatResponse.getMasterKey();
this.currentMasterKey =
(masterKeyFromRM != null
&& masterKeyFromRM.getKeyId() != this.currentMasterKey.getKeyId()
? masterKeyFromRM : this.currentMasterKey);
return heartbeatResponse;
} }
} }

View File

@ -25,12 +25,11 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeHealthStatus; import org.apache.hadoop.yarn.api.records.NodeHealthStatus;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
@ -188,11 +187,6 @@ public class MockNodes {
public HeartbeatResponse getLastHeartBeatResponse() { public HeartbeatResponse getLastHeartBeatResponse() {
return null; return null;
} }
@Override
public MasterKey getCurrentMasterKey() {
return null;
}
}; };
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr) { private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr) {

View File

@ -105,7 +105,7 @@ public class TestRMNodeTransitions {
new TestSchedulerEventDispatcher()); new TestSchedulerEventDispatcher());
NodeId nodeId = BuilderUtils.newNodeId("localhost", 0); NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null); node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null);
} }

View File

@ -54,6 +54,7 @@ import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
import org.xml.sax.InputSource; import org.xml.sax.InputSource;
import com.google.inject.Guice; import com.google.inject.Guice;
import com.google.inject.Injector; import com.google.inject.Injector;
import com.google.inject.servlet.GuiceServletContextListener; import com.google.inject.servlet.GuiceServletContextListener;
@ -145,7 +146,7 @@ public class TestRMWebServicesNodes extends JerseyTest {
nodeHealth.setHealthReport("test health report"); nodeHealth.setHealthReport("test health report");
nodeHealth.setIsNodeHealthy(false); nodeHealth.setIsNodeHealthy(false);
node.handle(new RMNodeStatusEvent(nm3.getNodeId(), nodeHealth, node.handle(new RMNodeStatusEvent(nm3.getNodeId(), nodeHealth,
new ArrayList<ContainerStatus>(), null, null, null)); new ArrayList<ContainerStatus>(), null, null));
rm.NMwaitForState(nm3.getNodeId(), NodeState.UNHEALTHY); rm.NMwaitForState(nm3.getNodeId(), NodeState.UNHEALTHY);
ClientResponse response = ClientResponse response =
@ -360,7 +361,7 @@ public class TestRMWebServicesNodes extends JerseyTest {
nodeHealth.setHealthReport("test health report"); nodeHealth.setHealthReport("test health report");
nodeHealth.setIsNodeHealthy(false); nodeHealth.setIsNodeHealthy(false);
node.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeHealth, node.handle(new RMNodeStatusEvent(nm1.getNodeId(), nodeHealth,
new ArrayList<ContainerStatus>(), null, null, null)); new ArrayList<ContainerStatus>(), null, null));
rm.NMwaitForState(nm1.getNodeId(), NodeState.UNHEALTHY); rm.NMwaitForState(nm1.getNodeId(), NodeState.UNHEALTHY);
ClientResponse response = r.path("ws").path("v1").path("cluster") ClientResponse response = r.path("ws").path("v1").path("cluster")

View File

@ -44,6 +44,12 @@
<groupId>org.apache.hadoop</groupId> <groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId> <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<type>test-jar</type>
<scope>test</scope>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -46,7 +46,6 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Text;
import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.SecurityUtil;
@ -222,7 +221,7 @@ public class TestContainerManagerSecurity {
Resource modifiedResource = BuilderUtils.newResource(2048); Resource modifiedResource = BuilderUtils.newResource(2048);
ContainerTokenIdentifier modifiedIdentifier = new ContainerTokenIdentifier( ContainerTokenIdentifier modifiedIdentifier = new ContainerTokenIdentifier(
dummyIdentifier.getContainerID(), dummyIdentifier.getNmHostAddress(), dummyIdentifier.getContainerID(), dummyIdentifier.getNmHostAddress(),
modifiedResource, Long.MAX_VALUE, 0); modifiedResource, Long.MAX_VALUE, dummyIdentifier.getMasterKeyId());
Token<ContainerTokenIdentifier> modifiedToken = new Token<ContainerTokenIdentifier>( Token<ContainerTokenIdentifier> modifiedToken = new Token<ContainerTokenIdentifier>(
modifiedIdentifier.getBytes(), containerToken.getPassword().array(), modifiedIdentifier.getBytes(), containerToken.getPassword().array(),
new Text(containerToken.getKind()), new Text(containerToken new Text(containerToken.getKind()), new Text(containerToken
@ -250,19 +249,14 @@ public class TestContainerManagerSecurity {
+ "it will indicate RPC success"); + "it will indicate RPC success");
} catch (Exception e) { } catch (Exception e) {
Assert.assertEquals( Assert.assertEquals(
java.lang.reflect.UndeclaredThrowableException.class java.lang.reflect.UndeclaredThrowableException.class
.getCanonicalName(), e.getClass().getCanonicalName()); .getCanonicalName(), e.getClass().getCanonicalName());
Assert.assertEquals(RemoteException.class.getCanonicalName(), e
.getCause().getClass().getCanonicalName());
Assert.assertEquals(
"org.apache.hadoop.security.token.SecretManager$InvalidToken",
((RemoteException) e.getCause()).getClassName());
Assert.assertTrue(e Assert.assertTrue(e
.getCause() .getCause()
.getMessage() .getMessage()
.matches( .contains(
"Given Container container_\\d*_\\d*_\\d\\d_\\d*" "DIGEST-MD5: digest response format violation. "
+ " seems to have an illegally generated token.")); + "Mismatched response."));
} }
return null; return null;
} }

View File

@ -0,0 +1,120 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server;
import java.io.IOException;
import junit.framework.Assert;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.DrainDispatcher;
import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.RegistrationResponse;
import org.apache.hadoop.yarn.server.resourcemanager.MockNM;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
import org.junit.Test;
public class TestRMNMSecretKeys {
@Test
public void testNMUpdation() throws Exception {
YarnConfiguration conf = new YarnConfiguration();
conf.set(CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION,
"kerberos");
UserGroupInformation.setConfiguration(conf);
// Default rolling and activation intervals are large enough, no need to
// intervene
final DrainDispatcher dispatcher = new DrainDispatcher();
ResourceManager rm = new ResourceManager(null) {
@Override
protected void doSecureLogin() throws IOException {
// Do nothing.
}
@Override
protected Dispatcher createDispatcher() {
return dispatcher;
}
};
rm.init(conf);
rm.start();
MockNM nm = new MockNM("host:1234", 3072, rm.getResourceTrackerService());
RegistrationResponse registrationResponse = nm.registerNode();
MasterKey masterKey = registrationResponse.getMasterKey();
Assert.assertNotNull("Registration should cause a key-update!", masterKey);
dispatcher.await();
HeartbeatResponse response = nm.nodeHeartbeat(true);
Assert.assertNull(
"First heartbeat after registration shouldn't get any key updates!",
response.getMasterKey());
dispatcher.await();
response = nm.nodeHeartbeat(true);
Assert
.assertNull(
"Even second heartbeat after registration shouldn't get any key updates!",
response.getMasterKey());
dispatcher.await();
// Let's force a roll-over
RMContainerTokenSecretManager secretManager =
rm.getRMContainerTokenSecretManager();
secretManager.rollMasterKey();
// Heartbeats after roll-over and before activation should be fine.
response = nm.nodeHeartbeat(true);
Assert.assertNotNull(
"Heartbeats after roll-over and before activation should not err out.",
response.getMasterKey());
Assert.assertEquals(
"Roll-over should have incremented the key-id only by one!",
masterKey.getKeyId() + 1, response.getMasterKey().getKeyId());
dispatcher.await();
response = nm.nodeHeartbeat(true);
Assert.assertNull(
"Second heartbeat after roll-over shouldn't get any key updates!",
response.getMasterKey());
dispatcher.await();
// Let's force activation
secretManager.activateNextMasterKey();
response = nm.nodeHeartbeat(true);
Assert.assertNull("Activation shouldn't cause any key updates!",
response.getMasterKey());
dispatcher.await();
response = nm.nodeHeartbeat(true);
Assert.assertNull(
"Even second heartbeat after activation shouldn't get any key updates!",
response.getMasterKey());
dispatcher.await();
rm.stop();
}
}