YARN-6102. RMActiveService context to be updated with new RMContext on failover. Contributed by Rohith Sharma K S.
This commit is contained in:
parent
db7c5636b6
commit
a657472b42
|
@ -57,7 +57,7 @@ public class ActiveStandbyElectorBasedElectorService extends AbstractService
|
||||||
new HAServiceProtocol.StateChangeRequestInfo(
|
new HAServiceProtocol.StateChangeRequestInfo(
|
||||||
HAServiceProtocol.RequestSource.REQUEST_BY_ZKFC);
|
HAServiceProtocol.RequestSource.REQUEST_BY_ZKFC);
|
||||||
|
|
||||||
private RMContext rmContext;
|
private ResourceManager rm;
|
||||||
|
|
||||||
private byte[] localActiveNodeInfo;
|
private byte[] localActiveNodeInfo;
|
||||||
private ActiveStandbyElector elector;
|
private ActiveStandbyElector elector;
|
||||||
|
@ -66,9 +66,9 @@ public class ActiveStandbyElectorBasedElectorService extends AbstractService
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
final Object zkDisconnectLock = new Object();
|
final Object zkDisconnectLock = new Object();
|
||||||
|
|
||||||
ActiveStandbyElectorBasedElectorService(RMContext rmContext) {
|
ActiveStandbyElectorBasedElectorService(ResourceManager rm) {
|
||||||
super(ActiveStandbyElectorBasedElectorService.class.getName());
|
super(ActiveStandbyElectorBasedElectorService.class.getName());
|
||||||
this.rmContext = rmContext;
|
this.rm = rm;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -140,7 +140,7 @@ public class ActiveStandbyElectorBasedElectorService extends AbstractService
|
||||||
cancelDisconnectTimer();
|
cancelDisconnectTimer();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
rmContext.getRMAdminService().transitionToActive(req);
|
rm.getRMContext().getRMAdminService().transitionToActive(req);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ServiceFailedException("RM could not transition to Active", e);
|
throw new ServiceFailedException("RM could not transition to Active", e);
|
||||||
}
|
}
|
||||||
|
@ -151,7 +151,7 @@ public class ActiveStandbyElectorBasedElectorService extends AbstractService
|
||||||
cancelDisconnectTimer();
|
cancelDisconnectTimer();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
rmContext.getRMAdminService().transitionToStandby(req);
|
rm.getRMContext().getRMAdminService().transitionToStandby(req);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("RM could not transition to Standby", e);
|
LOG.error("RM could not transition to Standby", e);
|
||||||
}
|
}
|
||||||
|
@ -205,7 +205,7 @@ public class ActiveStandbyElectorBasedElectorService extends AbstractService
|
||||||
@SuppressWarnings(value = "unchecked")
|
@SuppressWarnings(value = "unchecked")
|
||||||
@Override
|
@Override
|
||||||
public void notifyFatalError(String errorMessage) {
|
public void notifyFatalError(String errorMessage) {
|
||||||
rmContext.getDispatcher().getEventHandler().handle(
|
rm.getRMContext().getDispatcher().getEventHandler().handle(
|
||||||
new RMFatalEvent(RMFatalEventType.EMBEDDED_ELECTOR_FAILED,
|
new RMFatalEvent(RMFatalEventType.EMBEDDED_ELECTOR_FAILED,
|
||||||
errorMessage));
|
errorMessage));
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,7 +102,6 @@ public class AdminService extends CompositeService implements
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(AdminService.class);
|
private static final Log LOG = LogFactory.getLog(AdminService.class);
|
||||||
|
|
||||||
private final RMContext rmContext;
|
|
||||||
private final ResourceManager rm;
|
private final ResourceManager rm;
|
||||||
private String rmId;
|
private String rmId;
|
||||||
|
|
||||||
|
@ -123,16 +122,16 @@ public class AdminService extends CompositeService implements
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
boolean isCentralizedNodeLabelConfiguration = true;
|
boolean isCentralizedNodeLabelConfiguration = true;
|
||||||
|
|
||||||
public AdminService(ResourceManager rm, RMContext rmContext) {
|
public AdminService(ResourceManager rm) {
|
||||||
super(AdminService.class.getName());
|
super(AdminService.class.getName());
|
||||||
this.rm = rm;
|
this.rm = rm;
|
||||||
this.rmContext = rmContext;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void serviceInit(Configuration conf) throws Exception {
|
public void serviceInit(Configuration conf) throws Exception {
|
||||||
autoFailoverEnabled =
|
autoFailoverEnabled =
|
||||||
rmContext.isHAEnabled() && HAUtil.isAutomaticFailoverEnabled(conf);
|
rm.getRMContext().isHAEnabled()
|
||||||
|
&& HAUtil.isAutomaticFailoverEnabled(conf);
|
||||||
|
|
||||||
masterServiceBindAddress = conf.getSocketAddr(
|
masterServiceBindAddress = conf.getSocketAddr(
|
||||||
YarnConfiguration.RM_BIND_HOST,
|
YarnConfiguration.RM_BIND_HOST,
|
||||||
|
@ -189,7 +188,7 @@ public class AdminService extends CompositeService implements
|
||||||
RMPolicyProvider.getInstance());
|
RMPolicyProvider.getInstance());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (rmContext.isHAEnabled()) {
|
if (rm.getRMContext().isHAEnabled()) {
|
||||||
RPC.setProtocolEngine(conf, HAServiceProtocolPB.class,
|
RPC.setProtocolEngine(conf, HAServiceProtocolPB.class,
|
||||||
ProtobufRpcEngine.class);
|
ProtobufRpcEngine.class);
|
||||||
|
|
||||||
|
@ -265,7 +264,7 @@ public class AdminService extends CompositeService implements
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized boolean isRMActive() {
|
private synchronized boolean isRMActive() {
|
||||||
return HAServiceState.ACTIVE == rmContext.getHAServiceState();
|
return HAServiceState.ACTIVE == rm.getRMContext().getHAServiceState();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void throwStandbyException() throws StandbyException {
|
private void throwStandbyException() throws StandbyException {
|
||||||
|
@ -304,7 +303,7 @@ public class AdminService extends CompositeService implements
|
||||||
// call all refresh*s for active RM to get the updated configurations.
|
// call all refresh*s for active RM to get the updated configurations.
|
||||||
refreshAll();
|
refreshAll();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
rmContext
|
rm.getRMContext()
|
||||||
.getDispatcher()
|
.getDispatcher()
|
||||||
.getEventHandler()
|
.getEventHandler()
|
||||||
.handle(
|
.handle(
|
||||||
|
@ -363,7 +362,7 @@ public class AdminService extends CompositeService implements
|
||||||
@Override
|
@Override
|
||||||
public synchronized HAServiceStatus getServiceStatus() throws IOException {
|
public synchronized HAServiceStatus getServiceStatus() throws IOException {
|
||||||
checkAccess("getServiceState");
|
checkAccess("getServiceState");
|
||||||
HAServiceState haState = rmContext.getHAServiceState();
|
HAServiceState haState = rm.getRMContext().getHAServiceState();
|
||||||
HAServiceStatus ret = new HAServiceStatus(haState);
|
HAServiceStatus ret = new HAServiceStatus(haState);
|
||||||
if (isRMActive() || haState == HAServiceProtocol.HAServiceState.STANDBY) {
|
if (isRMActive() || haState == HAServiceProtocol.HAServiceState.STANDBY) {
|
||||||
ret.setReadyToBecomeActive();
|
ret.setReadyToBecomeActive();
|
||||||
|
@ -395,11 +394,12 @@ public class AdminService extends CompositeService implements
|
||||||
}
|
}
|
||||||
|
|
||||||
private void refreshQueues() throws IOException, YarnException {
|
private void refreshQueues() throws IOException, YarnException {
|
||||||
rmContext.getScheduler().reinitialize(getConfig(), this.rmContext);
|
rm.getRMContext().getScheduler().reinitialize(getConfig(),
|
||||||
|
this.rm.getRMContext());
|
||||||
// refresh the reservation system
|
// refresh the reservation system
|
||||||
ReservationSystem rSystem = rmContext.getReservationSystem();
|
ReservationSystem rSystem = rm.getRMContext().getReservationSystem();
|
||||||
if (rSystem != null) {
|
if (rSystem != null) {
|
||||||
rSystem.reinitialize(getConfig(), rmContext);
|
rSystem.reinitialize(getConfig(), rm.getRMContext());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,14 +418,14 @@ public class AdminService extends CompositeService implements
|
||||||
YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
|
YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
|
||||||
switch (request.getDecommissionType()) {
|
switch (request.getDecommissionType()) {
|
||||||
case NORMAL:
|
case NORMAL:
|
||||||
rmContext.getNodesListManager().refreshNodes(conf);
|
rm.getRMContext().getNodesListManager().refreshNodes(conf);
|
||||||
break;
|
break;
|
||||||
case GRACEFUL:
|
case GRACEFUL:
|
||||||
rmContext.getNodesListManager().refreshNodesGracefully(
|
rm.getRMContext().getNodesListManager().refreshNodesGracefully(
|
||||||
conf, request.getDecommissionTimeout());
|
conf, request.getDecommissionTimeout());
|
||||||
break;
|
break;
|
||||||
case FORCEFUL:
|
case FORCEFUL:
|
||||||
rmContext.getNodesListManager().refreshNodesForcefully();
|
rm.getRMContext().getNodesListManager().refreshNodesForcefully();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
||||||
|
@ -440,7 +440,7 @@ public class AdminService extends CompositeService implements
|
||||||
Configuration conf =
|
Configuration conf =
|
||||||
getConfiguration(new Configuration(false),
|
getConfiguration(new Configuration(false),
|
||||||
YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
|
YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
|
||||||
rmContext.getNodesListManager().refreshNodes(conf);
|
rm.getRMContext().getNodesListManager().refreshNodes(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -559,10 +559,11 @@ public class AdminService extends CompositeService implements
|
||||||
Configuration conf =
|
Configuration conf =
|
||||||
getConfiguration(new Configuration(false),
|
getConfiguration(new Configuration(false),
|
||||||
YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE);
|
YarnConfiguration.HADOOP_POLICY_CONFIGURATION_FILE);
|
||||||
rmContext.getClientRMService().refreshServiceAcls(conf, policyProvider);
|
rm.getRMContext().getClientRMService().refreshServiceAcls(conf,
|
||||||
rmContext.getApplicationMasterService().refreshServiceAcls(
|
policyProvider);
|
||||||
|
rm.getRMContext().getApplicationMasterService().refreshServiceAcls(
|
||||||
conf, policyProvider);
|
conf, policyProvider);
|
||||||
rmContext.getResourceTrackerService().refreshServiceAcls(
|
rm.getRMContext().getResourceTrackerService().refreshServiceAcls(
|
||||||
conf, policyProvider);
|
conf, policyProvider);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -601,7 +602,7 @@ public class AdminService extends CompositeService implements
|
||||||
// if any invalid nodes, throw exception instead of partially updating
|
// if any invalid nodes, throw exception instead of partially updating
|
||||||
// valid nodes.
|
// valid nodes.
|
||||||
for (NodeId nodeId : nodeIds) {
|
for (NodeId nodeId : nodeIds) {
|
||||||
RMNode node = this.rmContext.getRMNodes().get(nodeId);
|
RMNode node = this.rm.getRMContext().getRMNodes().get(nodeId);
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
LOG.error("Resource update get failed on all nodes due to change "
|
LOG.error("Resource update get failed on all nodes due to change "
|
||||||
+ "resource on an unrecognized node: " + nodeId);
|
+ "resource on an unrecognized node: " + nodeId);
|
||||||
|
@ -619,14 +620,14 @@ public class AdminService extends CompositeService implements
|
||||||
for (Map.Entry<NodeId, ResourceOption> entry : nodeResourceMap.entrySet()) {
|
for (Map.Entry<NodeId, ResourceOption> entry : nodeResourceMap.entrySet()) {
|
||||||
ResourceOption newResourceOption = entry.getValue();
|
ResourceOption newResourceOption = entry.getValue();
|
||||||
NodeId nodeId = entry.getKey();
|
NodeId nodeId = entry.getKey();
|
||||||
RMNode node = this.rmContext.getRMNodes().get(nodeId);
|
RMNode node = this.rm.getRMContext().getRMNodes().get(nodeId);
|
||||||
|
|
||||||
if (node == null) {
|
if (node == null) {
|
||||||
LOG.warn("Resource update get failed on an unrecognized node: " + nodeId);
|
LOG.warn("Resource update get failed on an unrecognized node: " + nodeId);
|
||||||
allSuccess = false;
|
allSuccess = false;
|
||||||
} else {
|
} else {
|
||||||
// update resource to RMNode
|
// update resource to RMNode
|
||||||
this.rmContext.getDispatcher().getEventHandler()
|
this.rm.getRMContext().getDispatcher().getEventHandler()
|
||||||
.handle(new RMNodeResourceUpdateEvent(nodeId, newResourceOption));
|
.handle(new RMNodeResourceUpdateEvent(nodeId, newResourceOption));
|
||||||
LOG.info("Update resource on node(" + node.getNodeID()
|
LOG.info("Update resource on node(" + node.getNodeID()
|
||||||
+ ") with resource(" + newResourceOption.toString() + ")");
|
+ ") with resource(" + newResourceOption.toString() + ")");
|
||||||
|
@ -661,7 +662,8 @@ public class AdminService extends CompositeService implements
|
||||||
DynamicResourceConfiguration newConf;
|
DynamicResourceConfiguration newConf;
|
||||||
|
|
||||||
InputStream drInputStream =
|
InputStream drInputStream =
|
||||||
this.rmContext.getConfigurationProvider().getConfigurationInputStream(
|
this.rm.getRMContext().getConfigurationProvider()
|
||||||
|
.getConfigurationInputStream(
|
||||||
configuration, YarnConfiguration.DR_CONFIGURATION_FILE);
|
configuration, YarnConfiguration.DR_CONFIGURATION_FILE);
|
||||||
|
|
||||||
if (drInputStream != null) {
|
if (drInputStream != null) {
|
||||||
|
@ -679,7 +681,7 @@ public class AdminService extends CompositeService implements
|
||||||
updateNodeResource(updateRequest);
|
updateNodeResource(updateRequest);
|
||||||
}
|
}
|
||||||
// refresh dynamic resource in ResourceTrackerService
|
// refresh dynamic resource in ResourceTrackerService
|
||||||
this.rmContext.getResourceTrackerService().
|
this.rm.getRMContext().getResourceTrackerService().
|
||||||
updateDynamicResourceConfiguration(newConf);
|
updateDynamicResourceConfiguration(newConf);
|
||||||
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
||||||
"AdminService");
|
"AdminService");
|
||||||
|
@ -692,7 +694,8 @@ public class AdminService extends CompositeService implements
|
||||||
private synchronized Configuration getConfiguration(Configuration conf,
|
private synchronized Configuration getConfiguration(Configuration conf,
|
||||||
String... confFileNames) throws YarnException, IOException {
|
String... confFileNames) throws YarnException, IOException {
|
||||||
for (String confFileName : confFileNames) {
|
for (String confFileName : confFileNames) {
|
||||||
InputStream confFileInputStream = this.rmContext.getConfigurationProvider()
|
InputStream confFileInputStream =
|
||||||
|
this.rm.getRMContext().getConfigurationProvider()
|
||||||
.getConfigurationInputStream(conf, confFileName);
|
.getConfigurationInputStream(conf, confFileName);
|
||||||
if (confFileInputStream != null) {
|
if (confFileInputStream != null) {
|
||||||
conf.addResource(confFileInputStream);
|
conf.addResource(confFileInputStream);
|
||||||
|
@ -746,7 +749,7 @@ public class AdminService extends CompositeService implements
|
||||||
AddToClusterNodeLabelsResponse response =
|
AddToClusterNodeLabelsResponse response =
|
||||||
recordFactory.newRecordInstance(AddToClusterNodeLabelsResponse.class);
|
recordFactory.newRecordInstance(AddToClusterNodeLabelsResponse.class);
|
||||||
try {
|
try {
|
||||||
rmContext.getNodeLabelManager()
|
rm.getRMContext().getNodeLabelManager()
|
||||||
.addToCluserNodeLabels(request.getNodeLabels());
|
.addToCluserNodeLabels(request.getNodeLabels());
|
||||||
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
||||||
"AdminService");
|
"AdminService");
|
||||||
|
@ -769,7 +772,8 @@ public class AdminService extends CompositeService implements
|
||||||
RemoveFromClusterNodeLabelsResponse response =
|
RemoveFromClusterNodeLabelsResponse response =
|
||||||
recordFactory.newRecordInstance(RemoveFromClusterNodeLabelsResponse.class);
|
recordFactory.newRecordInstance(RemoveFromClusterNodeLabelsResponse.class);
|
||||||
try {
|
try {
|
||||||
rmContext.getNodeLabelManager().removeFromClusterNodeLabels(request.getNodeLabels());
|
rm.getRMContext().getNodeLabelManager()
|
||||||
|
.removeFromClusterNodeLabels(request.getNodeLabels());
|
||||||
RMAuditLogger
|
RMAuditLogger
|
||||||
.logSuccess(user.getShortUserName(), operation, "AdminService");
|
.logSuccess(user.getShortUserName(), operation, "AdminService");
|
||||||
return response;
|
return response;
|
||||||
|
@ -805,19 +809,20 @@ public class AdminService extends CompositeService implements
|
||||||
boolean isKnown = false;
|
boolean isKnown = false;
|
||||||
// both active and inactive nodes are recognized as known nodes
|
// both active and inactive nodes are recognized as known nodes
|
||||||
if (requestedNode.getPort() != 0) {
|
if (requestedNode.getPort() != 0) {
|
||||||
if (rmContext.getRMNodes().containsKey(requestedNode)
|
if (rm.getRMContext().getRMNodes().containsKey(requestedNode) || rm
|
||||||
|| rmContext.getInactiveRMNodes().containsKey(requestedNode)) {
|
.getRMContext().getInactiveRMNodes().containsKey(requestedNode)) {
|
||||||
isKnown = true;
|
isKnown = true;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (NodeId knownNode : rmContext.getRMNodes().keySet()) {
|
for (NodeId knownNode : rm.getRMContext().getRMNodes().keySet()) {
|
||||||
if (knownNode.getHost().equals(requestedNode.getHost())) {
|
if (knownNode.getHost().equals(requestedNode.getHost())) {
|
||||||
isKnown = true;
|
isKnown = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!isKnown) {
|
if (!isKnown) {
|
||||||
for (NodeId knownNode : rmContext.getInactiveRMNodes().keySet()) {
|
for (NodeId knownNode : rm.getRMContext().getInactiveRMNodes()
|
||||||
|
.keySet()) {
|
||||||
if (knownNode.getHost().equals(requestedNode.getHost())) {
|
if (knownNode.getHost().equals(requestedNode.getHost())) {
|
||||||
isKnown = true;
|
isKnown = true;
|
||||||
break;
|
break;
|
||||||
|
@ -841,7 +846,7 @@ public class AdminService extends CompositeService implements
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
rmContext.getNodeLabelManager().replaceLabelsOnNode(
|
rm.getRMContext().getNodeLabelManager().replaceLabelsOnNode(
|
||||||
request.getNodeToLabels());
|
request.getNodeToLabels());
|
||||||
RMAuditLogger
|
RMAuditLogger
|
||||||
.logSuccess(user.getShortUserName(), operation, "AdminService");
|
.logSuccess(user.getShortUserName(), operation, "AdminService");
|
||||||
|
@ -878,7 +883,7 @@ public class AdminService extends CompositeService implements
|
||||||
|
|
||||||
checkRMStatus(user.getShortUserName(), operation, msg);
|
checkRMStatus(user.getShortUserName(), operation, msg);
|
||||||
|
|
||||||
Set<NodeId> decommissioningNodes = rmContext.getNodesListManager()
|
Set<NodeId> decommissioningNodes = rm.getRMContext().getNodesListManager()
|
||||||
.checkForDecommissioningNodes();
|
.checkForDecommissioningNodes();
|
||||||
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
RMAuditLogger.logSuccess(user.getShortUserName(), operation,
|
||||||
"AdminService");
|
"AdminService");
|
||||||
|
@ -914,6 +919,6 @@ public class AdminService extends CompositeService implements
|
||||||
getConfiguration(new Configuration(false),
|
getConfiguration(new Configuration(false),
|
||||||
YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
|
YarnConfiguration.YARN_SITE_CONFIGURATION_FILE);
|
||||||
|
|
||||||
rmContext.getScheduler().setClusterMaxPriority(conf);
|
rm.getRMContext().getScheduler().setClusterMaxPriority(conf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,14 +45,12 @@ public class CuratorBasedElectorService extends AbstractService
|
||||||
LogFactory.getLog(CuratorBasedElectorService.class);
|
LogFactory.getLog(CuratorBasedElectorService.class);
|
||||||
private LeaderLatch leaderLatch;
|
private LeaderLatch leaderLatch;
|
||||||
private CuratorFramework curator;
|
private CuratorFramework curator;
|
||||||
private RMContext rmContext;
|
|
||||||
private String latchPath;
|
private String latchPath;
|
||||||
private String rmId;
|
private String rmId;
|
||||||
private ResourceManager rm;
|
private ResourceManager rm;
|
||||||
|
|
||||||
public CuratorBasedElectorService(RMContext rmContext, ResourceManager rm) {
|
public CuratorBasedElectorService(ResourceManager rm) {
|
||||||
super(CuratorBasedElectorService.class.getName());
|
super(CuratorBasedElectorService.class.getName());
|
||||||
this.rmContext = rmContext;
|
|
||||||
this.rm = rm;
|
this.rm = rm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -102,7 +100,8 @@ public class CuratorBasedElectorService extends AbstractService
|
||||||
public void isLeader() {
|
public void isLeader() {
|
||||||
LOG.info(rmId + "is elected leader, transitioning to active");
|
LOG.info(rmId + "is elected leader, transitioning to active");
|
||||||
try {
|
try {
|
||||||
rmContext.getRMAdminService().transitionToActive(
|
rm.getRMContext().getRMAdminService()
|
||||||
|
.transitionToActive(
|
||||||
new HAServiceProtocol.StateChangeRequestInfo(
|
new HAServiceProtocol.StateChangeRequestInfo(
|
||||||
HAServiceProtocol.RequestSource.REQUEST_BY_ZKFC));
|
HAServiceProtocol.RequestSource.REQUEST_BY_ZKFC));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -123,7 +122,8 @@ public class CuratorBasedElectorService extends AbstractService
|
||||||
public void notLeader() {
|
public void notLeader() {
|
||||||
LOG.info(rmId + " relinquish leadership");
|
LOG.info(rmId + " relinquish leadership");
|
||||||
try {
|
try {
|
||||||
rmContext.getRMAdminService().transitionToStandby(
|
rm.getRMContext().getRMAdminService()
|
||||||
|
.transitionToStandby(
|
||||||
new HAServiceProtocol.StateChangeRequestInfo(
|
new HAServiceProtocol.StateChangeRequestInfo(
|
||||||
HAServiceProtocol.RequestSource.REQUEST_BY_ZKFC));
|
HAServiceProtocol.RequestSource.REQUEST_BY_ZKFC));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -30,8 +30,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMDelegatedNodeLabelsUpdater;
|
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMDelegatedNodeLabelsUpdater;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.placement.PlacementManager;
|
||||||
|
@ -44,6 +42,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.monitor.RMAppLifetime
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.distributed.QueueLimitCalculator;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
|
import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
|
import org.apache.hadoop.yarn.server.resourcemanager.security.ClientToAMTokenSecretManagerInRM;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer;
|
import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRenewer;
|
||||||
|
@ -106,6 +105,7 @@ public class RMActiveServiceContext {
|
||||||
private PlacementManager queuePlacementManager = null;
|
private PlacementManager queuePlacementManager = null;
|
||||||
|
|
||||||
private RMAppLifetimeMonitor rmAppLifetimeMonitor;
|
private RMAppLifetimeMonitor rmAppLifetimeMonitor;
|
||||||
|
private QueueLimitCalculator queueLimitCalculator;
|
||||||
|
|
||||||
public RMActiveServiceContext() {
|
public RMActiveServiceContext() {
|
||||||
queuePlacementManager = new PlacementManager();
|
queuePlacementManager = new PlacementManager();
|
||||||
|
@ -469,4 +469,17 @@ public class RMActiveServiceContext {
|
||||||
public RMAppLifetimeMonitor getRMAppLifetimeMonitor() {
|
public RMAppLifetimeMonitor getRMAppLifetimeMonitor() {
|
||||||
return this.rmAppLifetimeMonitor;
|
return this.rmAppLifetimeMonitor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public QueueLimitCalculator getNodeManagerQueueLimitCalculator() {
|
||||||
|
return this.queueLimitCalculator;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public void setContainerQueueLimitCalculator(
|
||||||
|
QueueLimitCalculator limitCalculator) {
|
||||||
|
this.queueLimitCalculator = limitCalculator;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,6 @@ import java.util.concurrent.ConcurrentMap;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol;
|
|
||||||
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||||
import org.apache.hadoop.yarn.LocalConfigurationProvider;
|
import org.apache.hadoop.yarn.LocalConfigurationProvider;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -56,37 +55,39 @@ import org.apache.hadoop.yarn.util.Clock;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RMContextImpl class holds two services context.
|
||||||
|
* <ul>
|
||||||
|
* <li>serviceContext : These services called as <b>Always On</b> services.
|
||||||
|
* Services that need to run always irrespective of the HA state of the RM.</li>
|
||||||
|
* <li>activeServiceCotext : Active services context. Services that need to run
|
||||||
|
* only on the Active RM.</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* <b>Note:</b> If any new service to be added to context, add it to a right
|
||||||
|
* context as per above description.
|
||||||
|
*/
|
||||||
public class RMContextImpl implements RMContext {
|
public class RMContextImpl implements RMContext {
|
||||||
|
|
||||||
private Dispatcher rmDispatcher;
|
/**
|
||||||
|
* RM service contexts which runs through out RM life span. These are created
|
||||||
private boolean isHAEnabled;
|
* once during start of RM.
|
||||||
|
*/
|
||||||
private HAServiceState haServiceState =
|
private RMServiceContext serviceContext;
|
||||||
HAServiceProtocol.HAServiceState.INITIALIZING;
|
|
||||||
|
|
||||||
private AdminService adminService;
|
|
||||||
|
|
||||||
private ConfigurationProvider configurationProvider;
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RM Active service context. This will be recreated for every transition from
|
||||||
|
* ACTIVE->STANDBY.
|
||||||
|
*/
|
||||||
private RMActiveServiceContext activeServiceContext;
|
private RMActiveServiceContext activeServiceContext;
|
||||||
|
|
||||||
private Configuration yarnConfiguration;
|
|
||||||
|
|
||||||
private RMApplicationHistoryWriter rmApplicationHistoryWriter;
|
|
||||||
private SystemMetricsPublisher systemMetricsPublisher;
|
|
||||||
private EmbeddedElector elector;
|
|
||||||
|
|
||||||
private QueueLimitCalculator queueLimitCalculator;
|
|
||||||
|
|
||||||
private final Object haServiceStateLock = new Object();
|
|
||||||
|
|
||||||
private ResourceManager resourceManager;
|
|
||||||
/**
|
/**
|
||||||
* Default constructor. To be used in conjunction with setter methods for
|
* Default constructor. To be used in conjunction with setter methods for
|
||||||
* individual fields.
|
* individual fields.
|
||||||
*/
|
*/
|
||||||
public RMContextImpl() {
|
public RMContextImpl() {
|
||||||
|
this.serviceContext = new RMServiceContext();
|
||||||
|
this.activeServiceContext = new RMActiveServiceContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@ -137,19 +138,143 @@ public class RMContextImpl implements RMContext {
|
||||||
clientToAMTokenSecretManager, null);
|
clientToAMTokenSecretManager, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public Dispatcher getDispatcher() {
|
* RM service contexts which runs through out JVM life span. These are created
|
||||||
return this.rmDispatcher;
|
* once during start of RM.
|
||||||
|
* @return serviceContext of RM
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public RMServiceContext getServiceContext() {
|
||||||
|
return serviceContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <b>Note:</b> setting service context clears all services embedded with it.
|
||||||
|
* @param context rm service context
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public void setServiceContext(RMServiceContext context) {
|
||||||
|
this.serviceContext = context;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setLeaderElectorService(EmbeddedElector elector) {
|
public ResourceManager getResourceManager() {
|
||||||
this.elector = elector;
|
return serviceContext.getResourceManager();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResourceManager(ResourceManager rm) {
|
||||||
|
serviceContext.setResourceManager(rm);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public EmbeddedElector getLeaderElectorService() {
|
public EmbeddedElector getLeaderElectorService() {
|
||||||
return this.elector;
|
return serviceContext.getLeaderElectorService();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setLeaderElectorService(EmbeddedElector elector) {
|
||||||
|
serviceContext.setLeaderElectorService(elector);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Dispatcher getDispatcher() {
|
||||||
|
return serviceContext.getDispatcher();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setDispatcher(Dispatcher dispatcher) {
|
||||||
|
serviceContext.setDispatcher(dispatcher);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AdminService getRMAdminService() {
|
||||||
|
return serviceContext.getRMAdminService();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setRMAdminService(AdminService adminService) {
|
||||||
|
serviceContext.setRMAdminService(adminService);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isHAEnabled() {
|
||||||
|
return serviceContext.isHAEnabled();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setHAEnabled(boolean isHAEnabled) {
|
||||||
|
serviceContext.setHAEnabled(isHAEnabled);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HAServiceState getHAServiceState() {
|
||||||
|
return serviceContext.getHAServiceState();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setHAServiceState(HAServiceState serviceState) {
|
||||||
|
serviceContext.setHAServiceState(serviceState);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public RMApplicationHistoryWriter getRMApplicationHistoryWriter() {
|
||||||
|
return serviceContext.getRMApplicationHistoryWriter();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setRMApplicationHistoryWriter(
|
||||||
|
RMApplicationHistoryWriter rmApplicationHistoryWriter) {
|
||||||
|
serviceContext.setRMApplicationHistoryWriter(rmApplicationHistoryWriter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SystemMetricsPublisher getSystemMetricsPublisher() {
|
||||||
|
return serviceContext.getSystemMetricsPublisher();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setSystemMetricsPublisher(
|
||||||
|
SystemMetricsPublisher metricsPublisher) {
|
||||||
|
serviceContext.setSystemMetricsPublisher(metricsPublisher);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ConfigurationProvider getConfigurationProvider() {
|
||||||
|
return serviceContext.getConfigurationProvider();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConfigurationProvider(
|
||||||
|
ConfigurationProvider configurationProvider) {
|
||||||
|
serviceContext.setConfigurationProvider(configurationProvider);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Configuration getYarnConfiguration() {
|
||||||
|
return serviceContext.getYarnConfiguration();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setYarnConfiguration(Configuration yarnConfiguration) {
|
||||||
|
serviceContext.setYarnConfiguration(yarnConfiguration);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHAZookeeperConnectionState() {
|
||||||
|
return serviceContext.getHAZookeeperConnectionState();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ==========================================================================
|
||||||
|
/**
|
||||||
|
* RM Active service context. This will be recreated for every transition from
|
||||||
|
* ACTIVE to STANDBY.
|
||||||
|
* @return activeServiceContext of active services
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public RMActiveServiceContext getActiveServiceContext() {
|
||||||
|
return activeServiceContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
void setActiveServiceContext(RMActiveServiceContext activeServiceContext) {
|
||||||
|
this.activeServiceContext = activeServiceContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -227,11 +352,6 @@ public class RMContextImpl implements RMContext {
|
||||||
return activeServiceContext.getClientToAMTokenSecretManager();
|
return activeServiceContext.getClientToAMTokenSecretManager();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public AdminService getRMAdminService() {
|
|
||||||
return this.adminService;
|
|
||||||
}
|
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public void setStateStore(RMStateStore store) {
|
public void setStateStore(RMStateStore store) {
|
||||||
activeServiceContext.setStateStore(store);
|
activeServiceContext.setStateStore(store);
|
||||||
|
@ -252,24 +372,6 @@ public class RMContextImpl implements RMContext {
|
||||||
return activeServiceContext.getResourceTrackerService();
|
return activeServiceContext.getResourceTrackerService();
|
||||||
}
|
}
|
||||||
|
|
||||||
void setHAEnabled(boolean isHAEnabled) {
|
|
||||||
this.isHAEnabled = isHAEnabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setHAServiceState(HAServiceState serviceState) {
|
|
||||||
synchronized (haServiceStateLock) {
|
|
||||||
this.haServiceState = serviceState;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void setDispatcher(Dispatcher dispatcher) {
|
|
||||||
this.rmDispatcher = dispatcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
void setRMAdminService(AdminService adminService) {
|
|
||||||
this.adminService = adminService;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setClientRMService(ClientRMService clientRMService) {
|
public void setClientRMService(ClientRMService clientRMService) {
|
||||||
activeServiceContext.setClientRMService(clientRMService);
|
activeServiceContext.setClientRMService(clientRMService);
|
||||||
|
@ -347,18 +449,6 @@ public class RMContextImpl implements RMContext {
|
||||||
activeServiceContext.setResourceTrackerService(resourceTrackerService);
|
activeServiceContext.setResourceTrackerService(resourceTrackerService);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isHAEnabled() {
|
|
||||||
return isHAEnabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HAServiceState getHAServiceState() {
|
|
||||||
synchronized (haServiceStateLock) {
|
|
||||||
return haServiceState;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setWorkPreservingRecoveryEnabled(boolean enabled) {
|
public void setWorkPreservingRecoveryEnabled(boolean enabled) {
|
||||||
activeServiceContext.setWorkPreservingRecoveryEnabled(enabled);
|
activeServiceContext.setWorkPreservingRecoveryEnabled(enabled);
|
||||||
}
|
}
|
||||||
|
@ -368,39 +458,6 @@ public class RMContextImpl implements RMContext {
|
||||||
return activeServiceContext.isWorkPreservingRecoveryEnabled();
|
return activeServiceContext.isWorkPreservingRecoveryEnabled();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public RMApplicationHistoryWriter getRMApplicationHistoryWriter() {
|
|
||||||
return this.rmApplicationHistoryWriter;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setSystemMetricsPublisher(
|
|
||||||
SystemMetricsPublisher systemMetricsPublisher) {
|
|
||||||
this.systemMetricsPublisher = systemMetricsPublisher;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public SystemMetricsPublisher getSystemMetricsPublisher() {
|
|
||||||
return this.systemMetricsPublisher;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setRMApplicationHistoryWriter(
|
|
||||||
RMApplicationHistoryWriter rmApplicationHistoryWriter) {
|
|
||||||
this.rmApplicationHistoryWriter = rmApplicationHistoryWriter;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ConfigurationProvider getConfigurationProvider() {
|
|
||||||
return this.configurationProvider;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setConfigurationProvider(
|
|
||||||
ConfigurationProvider configurationProvider) {
|
|
||||||
this.configurationProvider = configurationProvider;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getEpoch() {
|
public long getEpoch() {
|
||||||
return activeServiceContext.getEpoch();
|
return activeServiceContext.getEpoch();
|
||||||
|
@ -450,27 +507,6 @@ public class RMContextImpl implements RMContext {
|
||||||
return activeServiceContext.getSystemCredentialsForApps();
|
return activeServiceContext.getSystemCredentialsForApps();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Private
|
|
||||||
@Unstable
|
|
||||||
public RMActiveServiceContext getActiveServiceContext() {
|
|
||||||
return activeServiceContext;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Private
|
|
||||||
@Unstable
|
|
||||||
void setActiveServiceContext(RMActiveServiceContext activeServiceContext) {
|
|
||||||
this.activeServiceContext = activeServiceContext;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Configuration getYarnConfiguration() {
|
|
||||||
return this.yarnConfiguration;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setYarnConfiguration(Configuration yarnConfiguration) {
|
|
||||||
this.yarnConfiguration=yarnConfiguration;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public PlacementManager getQueuePlacementManager() {
|
public PlacementManager getQueuePlacementManager() {
|
||||||
return this.activeServiceContext.getQueuePlacementManager();
|
return this.activeServiceContext.getQueuePlacementManager();
|
||||||
|
@ -483,12 +519,12 @@ public class RMContextImpl implements RMContext {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public QueueLimitCalculator getNodeManagerQueueLimitCalculator() {
|
public QueueLimitCalculator getNodeManagerQueueLimitCalculator() {
|
||||||
return this.queueLimitCalculator;
|
return activeServiceContext.getNodeManagerQueueLimitCalculator();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setContainerQueueLimitCalculator(
|
public void setContainerQueueLimitCalculator(
|
||||||
QueueLimitCalculator limitCalculator) {
|
QueueLimitCalculator limitCalculator) {
|
||||||
this.queueLimitCalculator = limitCalculator;
|
activeServiceContext.setContainerQueueLimitCalculator(limitCalculator);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -502,21 +538,5 @@ public class RMContextImpl implements RMContext {
|
||||||
return this.activeServiceContext.getRMAppLifetimeMonitor();
|
return this.activeServiceContext.getRMAppLifetimeMonitor();
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getHAZookeeperConnectionState() {
|
// Note: Read java doc before adding any services over here.
|
||||||
if (elector == null) {
|
|
||||||
return "Could not find leader elector. Verify both HA and automatic " +
|
|
||||||
"failover are enabled.";
|
|
||||||
} else {
|
|
||||||
return elector.getZookeeperConnectionState();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ResourceManager getResourceManager() {
|
|
||||||
return resourceManager;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void setResourceManager(ResourceManager rm) {
|
|
||||||
this.resourceManager = rm;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,151 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
|
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||||
|
import org.apache.hadoop.yarn.conf.ConfigurationProvider;
|
||||||
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RMServiceContext class maintains "Always On" services. Services that need to
|
||||||
|
* run always irrespective of the HA state of the RM. This is created during
|
||||||
|
* initialization of RMContextImpl.
|
||||||
|
* <p>
|
||||||
|
* <b>Note:</b> If any services to be added in this class, make sure service
|
||||||
|
* will be running always irrespective of the HA state of the RM
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public class RMServiceContext {
|
||||||
|
|
||||||
|
private Dispatcher rmDispatcher;
|
||||||
|
private boolean isHAEnabled;
|
||||||
|
private HAServiceState haServiceState =
|
||||||
|
HAServiceProtocol.HAServiceState.INITIALIZING;
|
||||||
|
private AdminService adminService;
|
||||||
|
private ConfigurationProvider configurationProvider;
|
||||||
|
private Configuration yarnConfiguration;
|
||||||
|
private RMApplicationHistoryWriter rmApplicationHistoryWriter;
|
||||||
|
private SystemMetricsPublisher systemMetricsPublisher;
|
||||||
|
private EmbeddedElector elector;
|
||||||
|
private final Object haServiceStateLock = new Object();
|
||||||
|
private ResourceManager resourceManager;
|
||||||
|
|
||||||
|
public ResourceManager getResourceManager() {
|
||||||
|
return resourceManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setResourceManager(ResourceManager rm) {
|
||||||
|
this.resourceManager = rm;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ConfigurationProvider getConfigurationProvider() {
|
||||||
|
return this.configurationProvider;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setConfigurationProvider(
|
||||||
|
ConfigurationProvider configurationProvider) {
|
||||||
|
this.configurationProvider = configurationProvider;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Dispatcher getDispatcher() {
|
||||||
|
return this.rmDispatcher;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setDispatcher(Dispatcher dispatcher) {
|
||||||
|
this.rmDispatcher = dispatcher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EmbeddedElector getLeaderElectorService() {
|
||||||
|
return this.elector;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setLeaderElectorService(EmbeddedElector embeddedElector) {
|
||||||
|
this.elector = embeddedElector;
|
||||||
|
}
|
||||||
|
|
||||||
|
public AdminService getRMAdminService() {
|
||||||
|
return this.adminService;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setRMAdminService(AdminService service) {
|
||||||
|
this.adminService = service;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setHAEnabled(boolean rmHAEnabled) {
|
||||||
|
this.isHAEnabled = rmHAEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHAEnabled() {
|
||||||
|
return isHAEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HAServiceState getHAServiceState() {
|
||||||
|
synchronized (haServiceStateLock) {
|
||||||
|
return haServiceState;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void setHAServiceState(HAServiceState serviceState) {
|
||||||
|
synchronized (haServiceStateLock) {
|
||||||
|
this.haServiceState = serviceState;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public RMApplicationHistoryWriter getRMApplicationHistoryWriter() {
|
||||||
|
return this.rmApplicationHistoryWriter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setRMApplicationHistoryWriter(
|
||||||
|
RMApplicationHistoryWriter applicationHistoryWriter) {
|
||||||
|
this.rmApplicationHistoryWriter = applicationHistoryWriter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSystemMetricsPublisher(
|
||||||
|
SystemMetricsPublisher metricsPublisher) {
|
||||||
|
this.systemMetricsPublisher = metricsPublisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SystemMetricsPublisher getSystemMetricsPublisher() {
|
||||||
|
return this.systemMetricsPublisher;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Configuration getYarnConfiguration() {
|
||||||
|
return this.yarnConfiguration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setYarnConfiguration(Configuration yarnConfiguration) {
|
||||||
|
this.yarnConfiguration = yarnConfiguration;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHAZookeeperConnectionState() {
|
||||||
|
if (elector == null) {
|
||||||
|
return "Could not find leader elector. Verify both HA and automatic "
|
||||||
|
+ "failover are enabled.";
|
||||||
|
} else {
|
||||||
|
return elector.getZookeeperConnectionState();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -317,9 +317,9 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
YarnConfiguration.DEFAULT_CURATOR_LEADER_ELECTOR_ENABLED);
|
YarnConfiguration.DEFAULT_CURATOR_LEADER_ELECTOR_ENABLED);
|
||||||
if (curatorEnabled) {
|
if (curatorEnabled) {
|
||||||
this.curator = createAndStartCurator(conf);
|
this.curator = createAndStartCurator(conf);
|
||||||
elector = new CuratorBasedElectorService(rmContext, this);
|
elector = new CuratorBasedElectorService(this);
|
||||||
} else {
|
} else {
|
||||||
elector = new ActiveStandbyElectorBasedElectorService(rmContext);
|
elector = new ActiveStandbyElectorBasedElectorService(this);
|
||||||
}
|
}
|
||||||
return elector;
|
return elector;
|
||||||
}
|
}
|
||||||
|
@ -510,7 +510,6 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
private ApplicationMasterLauncher applicationMasterLauncher;
|
private ApplicationMasterLauncher applicationMasterLauncher;
|
||||||
private ContainerAllocationExpirer containerAllocationExpirer;
|
private ContainerAllocationExpirer containerAllocationExpirer;
|
||||||
private ResourceManager rm;
|
private ResourceManager rm;
|
||||||
private RMActiveServiceContext activeServiceContext;
|
|
||||||
private boolean fromActive = false;
|
private boolean fromActive = false;
|
||||||
private StandByTransitionRunnable standByTransitionRunnable;
|
private StandByTransitionRunnable standByTransitionRunnable;
|
||||||
|
|
||||||
|
@ -523,9 +522,6 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
protected void serviceInit(Configuration configuration) throws Exception {
|
protected void serviceInit(Configuration configuration) throws Exception {
|
||||||
standByTransitionRunnable = new StandByTransitionRunnable();
|
standByTransitionRunnable = new StandByTransitionRunnable();
|
||||||
|
|
||||||
activeServiceContext = new RMActiveServiceContext();
|
|
||||||
rmContext.setActiveServiceContext(activeServiceContext);
|
|
||||||
|
|
||||||
conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
|
conf.setBoolean(Dispatcher.DISPATCHER_EXIT_ON_ERROR_KEY, true);
|
||||||
rmSecretManagerService = createRMSecretManagerService();
|
rmSecretManagerService = createRMSecretManagerService();
|
||||||
addService(rmSecretManagerService);
|
addService(rmSecretManagerService);
|
||||||
|
@ -1031,7 +1027,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
ClusterMetrics.destroy();
|
ClusterMetrics.destroy();
|
||||||
QueueMetrics.clearQueueMetrics();
|
QueueMetrics.clearQueueMetrics();
|
||||||
if (initialize) {
|
if (initialize) {
|
||||||
resetDispatcher();
|
resetRMContext();
|
||||||
createAndInitActiveServices(true);
|
createAndInitActiveServices(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1176,7 +1172,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected AdminService createAdminService() {
|
protected AdminService createAdminService() {
|
||||||
return new AdminService(this, rmContext);
|
return new AdminService(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected RMSecretManagerService createRMSecretManagerService() {
|
protected RMSecretManagerService createRMSecretManagerService() {
|
||||||
|
@ -1299,16 +1295,24 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
return dispatcher;
|
return dispatcher;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void resetDispatcher() {
|
private void resetRMContext() {
|
||||||
|
RMContextImpl rmContextImpl = new RMContextImpl();
|
||||||
|
// transfer service context to new RM service Context
|
||||||
|
rmContextImpl.setServiceContext(rmContext.getServiceContext());
|
||||||
|
|
||||||
|
// reset dispatcher
|
||||||
Dispatcher dispatcher = setupDispatcher();
|
Dispatcher dispatcher = setupDispatcher();
|
||||||
((Service)dispatcher).init(this.conf);
|
((Service) dispatcher).init(this.conf);
|
||||||
((Service)dispatcher).start();
|
((Service) dispatcher).start();
|
||||||
removeService((Service)rmDispatcher);
|
removeService((Service) rmDispatcher);
|
||||||
// Need to stop previous rmDispatcher before assigning new dispatcher
|
// Need to stop previous rmDispatcher before assigning new dispatcher
|
||||||
// otherwise causes "AsyncDispatcher event handler" thread leak
|
// otherwise causes "AsyncDispatcher event handler" thread leak
|
||||||
((Service) rmDispatcher).stop();
|
((Service) rmDispatcher).stop();
|
||||||
rmDispatcher = dispatcher;
|
rmDispatcher = dispatcher;
|
||||||
addIfService(rmDispatcher);
|
addIfService(rmDispatcher);
|
||||||
|
rmContextImpl.setDispatcher(dispatcher);
|
||||||
|
|
||||||
|
rmContext = rmContextImpl;
|
||||||
rmContext.setDispatcher(rmDispatcher);
|
rmContext.setDispatcher(rmDispatcher);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1055,7 +1055,7 @@ public class MockRM extends ResourceManager {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AdminService createAdminService() {
|
protected AdminService createAdminService() {
|
||||||
return new AdminService(this, getRMContext()) {
|
return new AdminService(this) {
|
||||||
@Override
|
@Override
|
||||||
protected void startServer() {
|
protected void startServer() {
|
||||||
// override to not start rpc handler
|
// override to not start rpc handler
|
||||||
|
|
|
@ -123,13 +123,15 @@ public class TestRMEmbeddedElector extends ClientBaseWithFixes {
|
||||||
throws IOException, InterruptedException {
|
throws IOException, InterruptedException {
|
||||||
AdminService as = mock(AdminService.class);
|
AdminService as = mock(AdminService.class);
|
||||||
RMContext rc = mock(RMContext.class);
|
RMContext rc = mock(RMContext.class);
|
||||||
|
ResourceManager rm = mock(ResourceManager.class);
|
||||||
Configuration myConf = new Configuration(conf);
|
Configuration myConf = new Configuration(conf);
|
||||||
|
|
||||||
myConf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, 50);
|
myConf.setInt(YarnConfiguration.RM_ZK_TIMEOUT_MS, 50);
|
||||||
|
when(rm.getRMContext()).thenReturn(rc);
|
||||||
when(rc.getRMAdminService()).thenReturn(as);
|
when(rc.getRMAdminService()).thenReturn(as);
|
||||||
|
|
||||||
ActiveStandbyElectorBasedElectorService
|
ActiveStandbyElectorBasedElectorService ees =
|
||||||
ees = new ActiveStandbyElectorBasedElectorService(rc);
|
new ActiveStandbyElectorBasedElectorService(rm);
|
||||||
ees.init(myConf);
|
ees.init(myConf);
|
||||||
|
|
||||||
ees.enterNeutralMode();
|
ees.enterNeutralMode();
|
||||||
|
@ -291,7 +293,7 @@ public class TestRMEmbeddedElector extends ClientBaseWithFixes {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected EmbeddedElector createEmbeddedElector() {
|
protected EmbeddedElector createEmbeddedElector() {
|
||||||
return new ActiveStandbyElectorBasedElectorService(getRMContext()) {
|
return new ActiveStandbyElectorBasedElectorService(this) {
|
||||||
@Override
|
@Override
|
||||||
public void becomeActive() throws
|
public void becomeActive() throws
|
||||||
ServiceFailedException {
|
ServiceFailedException {
|
||||||
|
|
|
@ -70,6 +70,7 @@ public class TestRMHA {
|
||||||
private Log LOG = LogFactory.getLog(TestRMHA.class);
|
private Log LOG = LogFactory.getLog(TestRMHA.class);
|
||||||
private Configuration configuration;
|
private Configuration configuration;
|
||||||
private MockRM rm = null;
|
private MockRM rm = null;
|
||||||
|
private MockNM nm = null;
|
||||||
private RMApp app = null;
|
private RMApp app = null;
|
||||||
private RMAppAttempt attempt = null;
|
private RMAppAttempt attempt = null;
|
||||||
private static final String STATE_ERR =
|
private static final String STATE_ERR =
|
||||||
|
@ -134,7 +135,7 @@ public class TestRMHA {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
rm.getNewAppId();
|
rm.getNewAppId();
|
||||||
rm.registerNode("127.0.0.1:1", 2048);
|
nm = rm.registerNode("127.0.0.1:1", 2048);
|
||||||
app = rm.submitApp(1024);
|
app = rm.submitApp(1024);
|
||||||
attempt = app.getCurrentAppAttempt();
|
attempt = app.getCurrentAppAttempt();
|
||||||
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.SCHEDULED);
|
rm.waitForState(attempt.getAppAttemptId(), RMAppAttemptState.SCHEDULED);
|
||||||
|
@ -549,6 +550,17 @@ public class TestRMHA {
|
||||||
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
|
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
|
||||||
assertEquals(1, rm.getRMContext().getRMNodes().size());
|
assertEquals(1, rm.getRMContext().getRMNodes().size());
|
||||||
assertEquals(1, rm.getRMContext().getRMApps().size());
|
assertEquals(1, rm.getRMContext().getRMApps().size());
|
||||||
|
Assert.assertNotNull("Node not registered", nm);
|
||||||
|
|
||||||
|
rm.adminService.transitionToStandby(requestInfo);
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkStandbyRMFunctionality();
|
||||||
|
// race condition causes to register/node heartbeat node even after service
|
||||||
|
// is stopping/stopped. New RMContext is being created on every transition
|
||||||
|
// to standby, so metrics should be 0 which indicates new context reference
|
||||||
|
// has taken.
|
||||||
|
nm.registerNode();
|
||||||
|
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
// 3. Create new RM
|
// 3. Create new RM
|
||||||
rm = new MockRM(conf, memStore) {
|
rm = new MockRM(conf, memStore) {
|
||||||
|
@ -590,7 +602,7 @@ public class TestRMHA {
|
||||||
rm = new MockRM(configuration) {
|
rm = new MockRM(configuration) {
|
||||||
@Override
|
@Override
|
||||||
protected AdminService createAdminService() {
|
protected AdminService createAdminService() {
|
||||||
return new AdminService(this, getRMContext()) {
|
return new AdminService(this) {
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
@Override
|
@Override
|
||||||
protected void setConfig(Configuration conf) {
|
protected void setConfig(Configuration conf) {
|
||||||
|
|
Loading…
Reference in New Issue