YARN-1027. Implement RMHAProtocolService (Karthik Kambatla via bikas)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1523750 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
59587d9fad
commit
ead3dcc3de
|
@ -29,6 +29,7 @@ Release 2.3.0 - UNRELEASED
|
||||||
YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza)
|
YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza)
|
||||||
YARN-1098. Separate out RM services into Always On and Active (Karthik
|
YARN-1098. Separate out RM services into Always On and Active (Karthik
|
||||||
Kambatla via bikas)
|
Kambatla via bikas)
|
||||||
|
YARN-1027. Implement RMHAProtocolService (Karthik Kambatla via bikas)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
|
|
|
@ -271,6 +271,11 @@ public class YarnConfiguration extends Configuration {
|
||||||
public static final String RECOVERY_ENABLED = RM_PREFIX + "recovery.enabled";
|
public static final String RECOVERY_ENABLED = RM_PREFIX + "recovery.enabled";
|
||||||
public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false;
|
public static final boolean DEFAULT_RM_RECOVERY_ENABLED = false;
|
||||||
|
|
||||||
|
/** HA related configs */
|
||||||
|
public static final String RM_HA_PREFIX = RM_PREFIX + "ha.";
|
||||||
|
public static final String RM_HA_ENABLED = RM_HA_PREFIX + "enabled";
|
||||||
|
public static final boolean DEFAULT_RM_HA_ENABLED = false;
|
||||||
|
|
||||||
/** The class to use as the persistent store.*/
|
/** The class to use as the persistent store.*/
|
||||||
public static final String RM_STORE = RM_PREFIX + "store.class";
|
public static final String RM_STORE = RM_PREFIX + "store.class";
|
||||||
|
|
||||||
|
|
|
@ -268,6 +268,14 @@
|
||||||
<!--value>hdfs://localhost:9000/rmstore</value-->
|
<!--value>hdfs://localhost:9000/rmstore</value-->
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<description>Enable RM high-availability. When enabled, the RM starts
|
||||||
|
in the Standby mode by default, and transitions to the Active mode when
|
||||||
|
prompted to.</description>
|
||||||
|
<name>yarn.resourcemanager.ha.enabled</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<description>The maximum number of completed applications RM keeps. </description>
|
<description>The maximum number of completed applications RM keeps. </description>
|
||||||
<name>yarn.resourcemanager.max-completed-applications</name>
|
<name>yarn.resourcemanager.max-completed-applications</name>
|
||||||
|
|
|
@ -196,7 +196,7 @@ public class ClientRMService extends AbstractService implements
|
||||||
|
|
||||||
ApplicationId getNewApplicationId() {
|
ApplicationId getNewApplicationId() {
|
||||||
ApplicationId applicationId = org.apache.hadoop.yarn.server.utils.BuilderUtils
|
ApplicationId applicationId = org.apache.hadoop.yarn.server.utils.BuilderUtils
|
||||||
.newApplicationId(recordFactory, ResourceManager.clusterTimeStamp,
|
.newApplicationId(recordFactory, ResourceManager.getClusterTimeStamp(),
|
||||||
applicationCounter.incrementAndGet());
|
applicationCounter.incrementAndGet());
|
||||||
LOG.info("Allocated new applicationId: " + applicationId.getId());
|
LOG.info("Allocated new applicationId: " + applicationId.getId());
|
||||||
return applicationId;
|
return applicationId;
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
|
import org.apache.hadoop.ha.HAServiceStatus;
|
||||||
|
import org.apache.hadoop.ha.HealthCheckFailedException;
|
||||||
|
import org.apache.hadoop.service.AbstractService;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public class RMHAProtocolService extends AbstractService implements
|
||||||
|
HAServiceProtocol {
|
||||||
|
private static final Log LOG = LogFactory.getLog(RMHAProtocolService.class);
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
private ResourceManager rm;
|
||||||
|
@VisibleForTesting
|
||||||
|
protected HAServiceState haState = HAServiceState.INITIALIZING;
|
||||||
|
|
||||||
|
public RMHAProtocolService(ResourceManager resourceManager) {
|
||||||
|
super("RMHAProtocolService");
|
||||||
|
this.rm = resourceManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void serviceInit(Configuration conf) throws Exception {
|
||||||
|
this.conf = conf;
|
||||||
|
rm.createAndInitActiveServices();
|
||||||
|
super.serviceInit(this.conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void serviceStart() throws Exception {
|
||||||
|
boolean haEnabled = this.conf.getBoolean(YarnConfiguration.RM_HA_ENABLED,
|
||||||
|
YarnConfiguration.DEFAULT_RM_HA_ENABLED);
|
||||||
|
|
||||||
|
if (haEnabled) {
|
||||||
|
transitionToStandby(true);
|
||||||
|
} else {
|
||||||
|
transitionToActive();
|
||||||
|
}
|
||||||
|
|
||||||
|
super.serviceStart();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void serviceStop() throws Exception {
|
||||||
|
transitionToStandby(false);
|
||||||
|
haState = HAServiceState.STOPPING;
|
||||||
|
super.serviceStop();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void monitorHealth() throws HealthCheckFailedException {
|
||||||
|
if (haState == HAServiceState.ACTIVE && !rm.areActiveServicesRunning()) {
|
||||||
|
throw new HealthCheckFailedException(
|
||||||
|
"Active ResourceManager services are not running!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void transitionToActive() throws Exception {
|
||||||
|
if (haState == HAServiceState.ACTIVE) {
|
||||||
|
LOG.info("Already in active state");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("Transitioning to active");
|
||||||
|
rm.startActiveServices();
|
||||||
|
haState = HAServiceState.ACTIVE;
|
||||||
|
LOG.info("Transitioned to active");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void transitionToActive(StateChangeRequestInfo reqInfo) {
|
||||||
|
// TODO (YARN-1177): When automatic failover is enabled,
|
||||||
|
// check if transition should be allowed for this request
|
||||||
|
try {
|
||||||
|
transitionToActive();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Error when transitioning to Active mode", e);
|
||||||
|
throw new YarnRuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void transitionToStandby(boolean initialize)
|
||||||
|
throws Exception {
|
||||||
|
if (haState == HAServiceState.STANDBY) {
|
||||||
|
LOG.info("Already in standby state");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.info("Transitioning to standby");
|
||||||
|
if (haState == HAServiceState.ACTIVE) {
|
||||||
|
rm.stopActiveServices();
|
||||||
|
if (initialize) {
|
||||||
|
rm.createAndInitActiveServices();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
haState = HAServiceState.STANDBY;
|
||||||
|
LOG.info("Transitioned to standby");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void transitionToStandby(StateChangeRequestInfo reqInfo) {
|
||||||
|
// TODO (YARN-1177): When automatic failover is enabled,
|
||||||
|
// check if transition should be allowed for this request
|
||||||
|
try {
|
||||||
|
transitionToStandby(true);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("Error when transitioning to Standby mode", e);
|
||||||
|
throw new YarnRuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized HAServiceStatus getServiceStatus() throws IOException {
|
||||||
|
HAServiceStatus ret = new HAServiceStatus(haState);
|
||||||
|
if (haState == HAServiceState.ACTIVE || haState == HAServiceState.STANDBY) {
|
||||||
|
ret.setReadyToBecomeActive();
|
||||||
|
} else {
|
||||||
|
ret.setNotReadyToBecomeActive("State is " + haState);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
|
@ -105,7 +105,14 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
|
public static final int SHUTDOWN_HOOK_PRIORITY = 30;
|
||||||
|
|
||||||
private static final Log LOG = LogFactory.getLog(ResourceManager.class);
|
private static final Log LOG = LogFactory.getLog(ResourceManager.class);
|
||||||
public static final long clusterTimeStamp = System.currentTimeMillis();
|
private static long clusterTimeStamp = System.currentTimeMillis();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* "Always On" services. Services that need to run always irrespective of
|
||||||
|
* the HA state of the RM.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
protected RMHAProtocolService haService;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* "Active" services. Services that need to run only on the Active RM.
|
* "Active" services. Services that need to run only on the Active RM.
|
||||||
|
@ -156,13 +163,17 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
return this.rmContext;
|
return this.rmContext;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static long getClusterTimeStamp() {
|
||||||
|
return clusterTimeStamp;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void serviceInit(Configuration conf) throws Exception {
|
protected void serviceInit(Configuration conf) throws Exception {
|
||||||
validateConfigs(conf);
|
validateConfigs(conf);
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
|
|
||||||
activeServices = new RMActiveServices();
|
haService = new RMHAProtocolService(this);
|
||||||
addService(activeServices);
|
addService(haService);
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -470,6 +481,7 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
LOG.error("Error closing store.", e);
|
LOG.error("Error closing store.", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
super.serviceStop();
|
super.serviceStop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -708,6 +720,43 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
webApp = builder.start(new RMWebApp(this));
|
webApp = builder.start(new RMWebApp(this));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to create and init {@link #activeServices}. This creates an
|
||||||
|
* instance of {@link RMActiveServices} and initializes it.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
void createAndInitActiveServices() throws Exception {
|
||||||
|
activeServices = new RMActiveServices();
|
||||||
|
activeServices.init(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to start {@link #activeServices}.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
void startActiveServices() throws Exception {
|
||||||
|
if (activeServices != null) {
|
||||||
|
clusterTimeStamp = System.currentTimeMillis();
|
||||||
|
activeServices.start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to stop {@link #activeServices}.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
void stopActiveServices() throws Exception {
|
||||||
|
if (activeServices != null) {
|
||||||
|
activeServices.stop();
|
||||||
|
activeServices = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
protected boolean areActiveServicesRunning() {
|
||||||
|
return activeServices != null && activeServices.isInState(STATE.STARTED);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void serviceStart() throws Exception {
|
protected void serviceStart() throws Exception {
|
||||||
try {
|
try {
|
||||||
|
@ -715,7 +764,6 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
} catch(IOException ie) {
|
} catch(IOException ie) {
|
||||||
throw new YarnRuntimeException("Failed to login", ie);
|
throw new YarnRuntimeException("Failed to login", ie);
|
||||||
}
|
}
|
||||||
|
|
||||||
super.serviceStart();
|
super.serviceStart();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -229,7 +229,7 @@ public class ResourceTrackerService extends AbstractService implements
|
||||||
+ ", assigned nodeId " + nodeId;
|
+ ", assigned nodeId " + nodeId;
|
||||||
LOG.info(message);
|
LOG.info(message);
|
||||||
response.setNodeAction(NodeAction.NORMAL);
|
response.setNodeAction(NodeAction.NORMAL);
|
||||||
response.setRMIdentifier(ResourceManager.clusterTimeStamp);
|
response.setRMIdentifier(ResourceManager.getClusterTimeStamp());
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -185,7 +185,7 @@ public class RMContainerTokenSecretManager extends
|
||||||
tokenIdentifier =
|
tokenIdentifier =
|
||||||
new ContainerTokenIdentifier(containerId, nodeId.toString(),
|
new ContainerTokenIdentifier(containerId, nodeId.toString(),
|
||||||
appSubmitter, capability, expiryTimeStamp, this.currentMasterKey
|
appSubmitter, capability, expiryTimeStamp, this.currentMasterKey
|
||||||
.getMasterKey().getKeyId(), ResourceManager.clusterTimeStamp);
|
.getMasterKey().getKeyId(), ResourceManager.getClusterTimeStamp());
|
||||||
password = this.createPassword(tokenIdentifier);
|
password = this.createPassword(tokenIdentifier);
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
|
|
|
@ -108,7 +108,7 @@ public class AppInfo {
|
||||||
this.diagnostics = "";
|
this.diagnostics = "";
|
||||||
}
|
}
|
||||||
this.finalStatus = app.getFinalApplicationStatus();
|
this.finalStatus = app.getFinalApplicationStatus();
|
||||||
this.clusterId = ResourceManager.clusterTimeStamp;
|
this.clusterId = ResourceManager.getClusterTimeStamp();
|
||||||
if (hasAccess) {
|
if (hasAccess) {
|
||||||
this.startedTime = app.getStartTime();
|
this.startedTime = app.getStartTime();
|
||||||
this.finishedTime = app.getFinishTime();
|
this.finishedTime = app.getFinishTime();
|
||||||
|
|
|
@ -44,7 +44,7 @@ public class ClusterInfo {
|
||||||
} // JAXB needs this
|
} // JAXB needs this
|
||||||
|
|
||||||
public ClusterInfo(ResourceManager rm) {
|
public ClusterInfo(ResourceManager rm) {
|
||||||
long ts = ResourceManager.clusterTimeStamp;
|
long ts = ResourceManager.getClusterTimeStamp();
|
||||||
|
|
||||||
this.id = ts;
|
this.id = ts;
|
||||||
this.state = rm.getServiceState();
|
this.state = rm.getServiceState();
|
||||||
|
|
|
@ -0,0 +1,151 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.ha.HAServiceProtocol;
|
||||||
|
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
|
||||||
|
import org.apache.hadoop.ha.HAServiceProtocol.StateChangeRequestInfo;
|
||||||
|
import org.apache.hadoop.ha.HealthCheckFailedException;
|
||||||
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
|
public class TestRMHA {
|
||||||
|
private Log LOG = LogFactory.getLog(TestRMHA.class);
|
||||||
|
private MockRM rm = null;
|
||||||
|
private static final String STATE_ERR =
|
||||||
|
"ResourceManager is in wrong HA state";
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
Configuration conf = new YarnConfiguration();
|
||||||
|
conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true);
|
||||||
|
rm = new MockRM(conf);
|
||||||
|
rm.init(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkMonitorHealth() {
|
||||||
|
try {
|
||||||
|
rm.haService.monitorHealth();
|
||||||
|
} catch (HealthCheckFailedException e) {
|
||||||
|
fail("The RM is in bad health: it is Active, but the active services " +
|
||||||
|
"are not running");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkStandbyRMFunctionality() throws IOException {
|
||||||
|
assertEquals(STATE_ERR, HAServiceState.STANDBY,
|
||||||
|
rm.haService.getServiceStatus().getState());
|
||||||
|
assertFalse("Active RM services are started",
|
||||||
|
rm.areActiveServicesRunning());
|
||||||
|
assertTrue("RM is not ready to become active",
|
||||||
|
rm.haService.getServiceStatus().isReadyToBecomeActive());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkActiveRMFunctionality() throws IOException {
|
||||||
|
assertEquals(STATE_ERR, HAServiceState.ACTIVE,
|
||||||
|
rm.haService.getServiceStatus().getState());
|
||||||
|
assertTrue("Active RM services aren't started",
|
||||||
|
rm.areActiveServicesRunning());
|
||||||
|
assertTrue("RM is not ready to become active",
|
||||||
|
rm.haService.getServiceStatus().isReadyToBecomeActive());
|
||||||
|
|
||||||
|
try {
|
||||||
|
rm.getNewAppId();
|
||||||
|
rm.registerNode("127.0.0.1:0", 2048);
|
||||||
|
rm.submitApp(1024);
|
||||||
|
} catch (Exception e) {
|
||||||
|
fail("Unable to perform Active RM functions");
|
||||||
|
LOG.error("ActiveRM check failed", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test to verify the following RM HA transitions to the following states.
|
||||||
|
* 1. Standby: Should be a no-op
|
||||||
|
* 2. Active: Active services should start
|
||||||
|
* 3. Active: Should be a no-op.
|
||||||
|
* While active, submit a couple of jobs
|
||||||
|
* 4. Standby: Active services should stop
|
||||||
|
* 5. Active: Active services should start
|
||||||
|
* 6. Stop the RM: All services should stop and RM should not be ready to
|
||||||
|
* become Active
|
||||||
|
*/
|
||||||
|
@Test (timeout = 30000)
|
||||||
|
public void testStartAndTransitions() throws IOException {
|
||||||
|
StateChangeRequestInfo requestInfo = new StateChangeRequestInfo(
|
||||||
|
HAServiceProtocol.RequestSource.REQUEST_BY_USER);
|
||||||
|
|
||||||
|
assertEquals(STATE_ERR, HAServiceState.INITIALIZING,
|
||||||
|
rm.haService.getServiceStatus().getState());
|
||||||
|
assertFalse("RM is ready to become active before being started",
|
||||||
|
rm.haService.getServiceStatus().isReadyToBecomeActive());
|
||||||
|
checkMonitorHealth();
|
||||||
|
|
||||||
|
rm.start();
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkStandbyRMFunctionality();
|
||||||
|
|
||||||
|
// 1. Transition to Standby - must be a no-op
|
||||||
|
rm.haService.transitionToStandby(requestInfo);
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkStandbyRMFunctionality();
|
||||||
|
|
||||||
|
// 2. Transition to active
|
||||||
|
rm.haService.transitionToActive(requestInfo);
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkActiveRMFunctionality();
|
||||||
|
|
||||||
|
// 3. Transition to active - no-op
|
||||||
|
rm.haService.transitionToActive(requestInfo);
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkActiveRMFunctionality();
|
||||||
|
|
||||||
|
// 4. Transition to standby
|
||||||
|
rm.haService.transitionToStandby(requestInfo);
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkStandbyRMFunctionality();
|
||||||
|
|
||||||
|
// 5. Transition to active to check Active->Standby->Active works
|
||||||
|
rm.haService.transitionToActive(requestInfo);
|
||||||
|
checkMonitorHealth();
|
||||||
|
checkActiveRMFunctionality();
|
||||||
|
|
||||||
|
// 6. Stop the RM. All services should stop and RM should not be ready to
|
||||||
|
// become active
|
||||||
|
rm.stop();
|
||||||
|
assertEquals(STATE_ERR, HAServiceState.STOPPING,
|
||||||
|
rm.haService.getServiceStatus().getState());
|
||||||
|
assertFalse("RM is ready to become active even after it is stopped",
|
||||||
|
rm.haService.getServiceStatus().isReadyToBecomeActive());
|
||||||
|
assertFalse("Active RM services are started",
|
||||||
|
rm.areActiveServicesRunning());
|
||||||
|
checkMonitorHealth();
|
||||||
|
}
|
||||||
|
}
|
|
@ -283,7 +283,7 @@ public class TestResourceTrackerService {
|
||||||
RegisterNodeManagerResponse response = nm.registerNode();
|
RegisterNodeManagerResponse response = nm.registerNode();
|
||||||
|
|
||||||
// Verify the RMIdentifier is correctly set in RegisterNodeManagerResponse
|
// Verify the RMIdentifier is correctly set in RegisterNodeManagerResponse
|
||||||
Assert.assertEquals(ResourceManager.clusterTimeStamp,
|
Assert.assertEquals(ResourceManager.getClusterTimeStamp(),
|
||||||
response.getRMIdentifier());
|
response.getRMIdentifier());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,7 @@ public class TestApplicationMasterService {
|
||||||
ContainerTokenIdentifier tokenId =
|
ContainerTokenIdentifier tokenId =
|
||||||
BuilderUtils.newContainerTokenIdentifier(allocatedContainer
|
BuilderUtils.newContainerTokenIdentifier(allocatedContainer
|
||||||
.getContainerToken());
|
.getContainerToken());
|
||||||
Assert.assertEquals(MockRM.clusterTimeStamp, tokenId.getRMIdentifer());
|
Assert.assertEquals(MockRM.getClusterTimeStamp(), tokenId.getRMIdentifer());
|
||||||
rm.stop();
|
rm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -295,10 +295,10 @@ public class TestRMWebServices extends JerseyTest {
|
||||||
String hadoopVersion, String resourceManagerVersionBuiltOn,
|
String hadoopVersion, String resourceManagerVersionBuiltOn,
|
||||||
String resourceManagerBuildVersion, String resourceManagerVersion) {
|
String resourceManagerBuildVersion, String resourceManagerVersion) {
|
||||||
|
|
||||||
assertEquals("clusterId doesn't match: ", ResourceManager.clusterTimeStamp,
|
assertEquals("clusterId doesn't match: ",
|
||||||
clusterid);
|
ResourceManager.getClusterTimeStamp(), clusterid);
|
||||||
assertEquals("startedOn doesn't match: ", ResourceManager.clusterTimeStamp,
|
assertEquals("startedOn doesn't match: ",
|
||||||
startedon);
|
ResourceManager.getClusterTimeStamp(), startedon);
|
||||||
assertTrue("stated doesn't match: " + state,
|
assertTrue("stated doesn't match: " + state,
|
||||||
state.matches(STATE.INITED.toString()));
|
state.matches(STATE.INITED.toString()));
|
||||||
|
|
||||||
|
|
|
@ -1181,8 +1181,8 @@ public class TestRMWebServicesApps extends JerseyTest {
|
||||||
trackingUI);
|
trackingUI);
|
||||||
WebServicesTestUtils.checkStringMatch("diagnostics", app.getDiagnostics()
|
WebServicesTestUtils.checkStringMatch("diagnostics", app.getDiagnostics()
|
||||||
.toString(), diagnostics);
|
.toString(), diagnostics);
|
||||||
assertEquals("clusterId doesn't match", ResourceManager.clusterTimeStamp,
|
assertEquals("clusterId doesn't match",
|
||||||
clusterId);
|
ResourceManager.getClusterTimeStamp(), clusterId);
|
||||||
assertEquals("startedTime doesn't match", app.getStartTime(), startedTime);
|
assertEquals("startedTime doesn't match", app.getStartTime(), startedTime);
|
||||||
assertEquals("finishedTime doesn't match", app.getFinishTime(),
|
assertEquals("finishedTime doesn't match", app.getFinishTime(),
|
||||||
finishedTime);
|
finishedTime);
|
||||||
|
|
Loading…
Reference in New Issue