YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1579015 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Karthik Kambatla 2014-03-18 19:03:24 +00:00
parent b18aaa79f5
commit d33b320214
4 changed files with 40 additions and 7 deletions

View File

@ -158,6 +158,8 @@ Release 2.4.0 - UNRELEASED
YARN-1846. TestRM#testNMTokenSentForNormalContainer assumes CapacityScheduler. YARN-1846. TestRM#testNMTokenSentForNormalContainer assumes CapacityScheduler.
(Robert Kanter via kasha) (Robert Kanter via kasha)
YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)
IMPROVEMENTS IMPROVEMENTS
YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via

View File

@ -81,6 +81,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEventType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEventType;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@ -840,6 +841,8 @@ public class ResourceManager extends CompositeService implements Recoverable {
rmContext.getRMNodes().clear(); rmContext.getRMNodes().clear();
rmContext.getInactiveRMNodes().clear(); rmContext.getInactiveRMNodes().clear();
rmContext.getRMApps().clear(); rmContext.getRMApps().clear();
ClusterMetrics.destroy();
QueueMetrics.clearQueueMetrics();
} }
} }

View File

@ -127,7 +127,7 @@ public class QueueMetrics implements MetricsSource {
} }
/** /**
* Helper method to clear cache - used only for unit tests. * Helper method to clear cache.
*/ */
@Private @Private
public synchronized static void clearQueueMetrics() { public synchronized static void clearQueueMetrics() {

View File

@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.conf.HAUtil;
import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -138,32 +139,38 @@ public class TestRMHA {
rm.start(); rm.start();
checkMonitorHealth(); checkMonitorHealth();
checkStandbyRMFunctionality(); checkStandbyRMFunctionality();
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
// 1. Transition to Standby - must be a no-op // 1. Transition to Standby - must be a no-op
rm.adminService.transitionToStandby(requestInfo); rm.adminService.transitionToStandby(requestInfo);
checkMonitorHealth(); checkMonitorHealth();
checkStandbyRMFunctionality(); checkStandbyRMFunctionality();
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
// 2. Transition to active // 2. Transition to active
rm.adminService.transitionToActive(requestInfo); rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth(); checkMonitorHealth();
checkActiveRMFunctionality(); checkActiveRMFunctionality();
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
// 3. Transition to active - no-op // 3. Transition to active - no-op
rm.adminService.transitionToActive(requestInfo); rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth(); checkMonitorHealth();
checkActiveRMFunctionality(); checkActiveRMFunctionality();
verifyClusterMetrics(1, 2, 2, 2, 2048, 2);
// 4. Transition to standby // 4. Transition to standby
rm.adminService.transitionToStandby(requestInfo); rm.adminService.transitionToStandby(requestInfo);
checkMonitorHealth(); checkMonitorHealth();
checkStandbyRMFunctionality(); checkStandbyRMFunctionality();
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
// 5. Transition to active to check Active->Standby->Active works // 5. Transition to active to check Active->Standby->Active works
rm.adminService.transitionToActive(requestInfo); rm.adminService.transitionToActive(requestInfo);
checkMonitorHealth(); checkMonitorHealth();
checkActiveRMFunctionality(); checkActiveRMFunctionality();
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
// 6. Stop the RM. All services should stop and RM should not be ready to // 6. Stop the RM. All services should stop and RM should not be ready to
// become active // become active
rm.stop(); rm.stop();
@ -367,6 +374,27 @@ public class TestRMHA {
fail("Should not throw any exceptions."); fail("Should not throw any exceptions.");
} }
} }
private void verifyClusterMetrics(int activeNodes, int appsSubmitted,
int appsPending, int containersPending, int availableMB,
int activeApplications) {
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
// verify queue metrics
assertMetric("appsSubmitted", appsSubmitted, metrics.getAppsSubmitted());
assertMetric("appsPending", appsPending, metrics.getAppsPending());
assertMetric("containersPending", containersPending,
metrics.getPendingContainers());
assertMetric("availableMB", availableMB, metrics.getAvailableMB());
assertMetric("activeApplications", activeApplications,
metrics.getActiveApps());
// verify node metric
ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics();
assertMetric("activeNodes", activeNodes, clusterMetrics.getNumActiveNMs());
}
private void assertMetric(String metricName, int expected, int actual) {
assertEquals("Incorrect value for metric " + metricName, expected, actual);
}
@SuppressWarnings("rawtypes") @SuppressWarnings("rawtypes")
class MyCountingDispatcher extends AbstractService implements Dispatcher { class MyCountingDispatcher extends AbstractService implements Dispatcher {