YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1579015 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b18aaa79f5
commit
d33b320214
|
@ -158,6 +158,8 @@ Release 2.4.0 - UNRELEASED
|
|||
YARN-1846. TestRM#testNMTokenSentForNormalContainer assumes CapacityScheduler.
|
||||
(Robert Kanter via kasha)
|
||||
|
||||
YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)
|
||||
|
||||
IMPROVEMENTS
|
||||
|
||||
YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
|
||||
|
|
|
@ -81,6 +81,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEventType;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
|
||||
|
@ -840,6 +841,8 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
|||
rmContext.getRMNodes().clear();
|
||||
rmContext.getInactiveRMNodes().clear();
|
||||
rmContext.getRMApps().clear();
|
||||
ClusterMetrics.destroy();
|
||||
QueueMetrics.clearQueueMetrics();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -127,7 +127,7 @@ public class QueueMetrics implements MetricsSource {
|
|||
}
|
||||
|
||||
/**
|
||||
* Helper method to clear cache - used only for unit tests.
|
||||
* Helper method to clear cache.
|
||||
*/
|
||||
@Private
|
||||
public synchronized static void clearQueueMetrics() {
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.conf.HAUtil;
|
|||
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||
import org.apache.hadoop.yarn.event.EventHandler;
|
||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -138,32 +139,38 @@ public class TestRMHA {
|
|||
rm.start();
|
||||
checkMonitorHealth();
|
||||
checkStandbyRMFunctionality();
|
||||
|
||||
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
|
||||
|
||||
// 1. Transition to Standby - must be a no-op
|
||||
rm.adminService.transitionToStandby(requestInfo);
|
||||
checkMonitorHealth();
|
||||
checkStandbyRMFunctionality();
|
||||
|
||||
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
|
||||
|
||||
// 2. Transition to active
|
||||
rm.adminService.transitionToActive(requestInfo);
|
||||
checkMonitorHealth();
|
||||
checkActiveRMFunctionality();
|
||||
|
||||
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
|
||||
|
||||
// 3. Transition to active - no-op
|
||||
rm.adminService.transitionToActive(requestInfo);
|
||||
checkMonitorHealth();
|
||||
checkActiveRMFunctionality();
|
||||
|
||||
verifyClusterMetrics(1, 2, 2, 2, 2048, 2);
|
||||
|
||||
// 4. Transition to standby
|
||||
rm.adminService.transitionToStandby(requestInfo);
|
||||
checkMonitorHealth();
|
||||
checkStandbyRMFunctionality();
|
||||
|
||||
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
|
||||
|
||||
// 5. Transition to active to check Active->Standby->Active works
|
||||
rm.adminService.transitionToActive(requestInfo);
|
||||
checkMonitorHealth();
|
||||
checkActiveRMFunctionality();
|
||||
|
||||
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
|
||||
|
||||
// 6. Stop the RM. All services should stop and RM should not be ready to
|
||||
// become active
|
||||
rm.stop();
|
||||
|
@ -367,6 +374,27 @@ public class TestRMHA {
|
|||
fail("Should not throw any exceptions.");
|
||||
}
|
||||
}
|
||||
|
||||
private void verifyClusterMetrics(int activeNodes, int appsSubmitted,
|
||||
int appsPending, int containersPending, int availableMB,
|
||||
int activeApplications) {
|
||||
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
|
||||
// verify queue metrics
|
||||
assertMetric("appsSubmitted", appsSubmitted, metrics.getAppsSubmitted());
|
||||
assertMetric("appsPending", appsPending, metrics.getAppsPending());
|
||||
assertMetric("containersPending", containersPending,
|
||||
metrics.getPendingContainers());
|
||||
assertMetric("availableMB", availableMB, metrics.getAvailableMB());
|
||||
assertMetric("activeApplications", activeApplications,
|
||||
metrics.getActiveApps());
|
||||
// verify node metric
|
||||
ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics();
|
||||
assertMetric("activeNodes", activeNodes, clusterMetrics.getNumActiveNMs());
|
||||
}
|
||||
|
||||
private void assertMetric(String metricName, int expected, int actual) {
|
||||
assertEquals("Incorrect value for metric " + metricName, expected, actual);
|
||||
}
|
||||
|
||||
@SuppressWarnings("rawtypes")
|
||||
class MyCountingDispatcher extends AbstractService implements Dispatcher {
|
||||
|
|
Loading…
Reference in New Issue