YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1579015 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b18aaa79f5
commit
d33b320214
|
@ -158,6 +158,8 @@ Release 2.4.0 - UNRELEASED
|
||||||
YARN-1846. TestRM#testNMTokenSentForNormalContainer assumes CapacityScheduler.
|
YARN-1846. TestRM#testNMTokenSentForNormalContainer assumes CapacityScheduler.
|
||||||
(Robert Kanter via kasha)
|
(Robert Kanter via kasha)
|
||||||
|
|
||||||
|
YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
|
YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via
|
||||||
|
|
|
@ -81,6 +81,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerPreemptEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
|
||||||
|
@ -840,6 +841,8 @@ public class ResourceManager extends CompositeService implements Recoverable {
|
||||||
rmContext.getRMNodes().clear();
|
rmContext.getRMNodes().clear();
|
||||||
rmContext.getInactiveRMNodes().clear();
|
rmContext.getInactiveRMNodes().clear();
|
||||||
rmContext.getRMApps().clear();
|
rmContext.getRMApps().clear();
|
||||||
|
ClusterMetrics.destroy();
|
||||||
|
QueueMetrics.clearQueueMetrics();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -127,7 +127,7 @@ public class QueueMetrics implements MetricsSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method to clear cache - used only for unit tests.
|
* Helper method to clear cache.
|
||||||
*/
|
*/
|
||||||
@Private
|
@Private
|
||||||
public synchronized static void clearQueueMetrics() {
|
public synchronized static void clearQueueMetrics() {
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.conf.HAUtil;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -138,32 +139,38 @@ public class TestRMHA {
|
||||||
rm.start();
|
rm.start();
|
||||||
checkMonitorHealth();
|
checkMonitorHealth();
|
||||||
checkStandbyRMFunctionality();
|
checkStandbyRMFunctionality();
|
||||||
|
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
// 1. Transition to Standby - must be a no-op
|
// 1. Transition to Standby - must be a no-op
|
||||||
rm.adminService.transitionToStandby(requestInfo);
|
rm.adminService.transitionToStandby(requestInfo);
|
||||||
checkMonitorHealth();
|
checkMonitorHealth();
|
||||||
checkStandbyRMFunctionality();
|
checkStandbyRMFunctionality();
|
||||||
|
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
// 2. Transition to active
|
// 2. Transition to active
|
||||||
rm.adminService.transitionToActive(requestInfo);
|
rm.adminService.transitionToActive(requestInfo);
|
||||||
checkMonitorHealth();
|
checkMonitorHealth();
|
||||||
checkActiveRMFunctionality();
|
checkActiveRMFunctionality();
|
||||||
|
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
|
||||||
|
|
||||||
// 3. Transition to active - no-op
|
// 3. Transition to active - no-op
|
||||||
rm.adminService.transitionToActive(requestInfo);
|
rm.adminService.transitionToActive(requestInfo);
|
||||||
checkMonitorHealth();
|
checkMonitorHealth();
|
||||||
checkActiveRMFunctionality();
|
checkActiveRMFunctionality();
|
||||||
|
verifyClusterMetrics(1, 2, 2, 2, 2048, 2);
|
||||||
|
|
||||||
// 4. Transition to standby
|
// 4. Transition to standby
|
||||||
rm.adminService.transitionToStandby(requestInfo);
|
rm.adminService.transitionToStandby(requestInfo);
|
||||||
checkMonitorHealth();
|
checkMonitorHealth();
|
||||||
checkStandbyRMFunctionality();
|
checkStandbyRMFunctionality();
|
||||||
|
verifyClusterMetrics(0, 0, 0, 0, 0, 0);
|
||||||
|
|
||||||
// 5. Transition to active to check Active->Standby->Active works
|
// 5. Transition to active to check Active->Standby->Active works
|
||||||
rm.adminService.transitionToActive(requestInfo);
|
rm.adminService.transitionToActive(requestInfo);
|
||||||
checkMonitorHealth();
|
checkMonitorHealth();
|
||||||
checkActiveRMFunctionality();
|
checkActiveRMFunctionality();
|
||||||
|
verifyClusterMetrics(1, 1, 1, 1, 2048, 1);
|
||||||
|
|
||||||
// 6. Stop the RM. All services should stop and RM should not be ready to
|
// 6. Stop the RM. All services should stop and RM should not be ready to
|
||||||
// become active
|
// become active
|
||||||
rm.stop();
|
rm.stop();
|
||||||
|
@ -367,6 +374,27 @@ public class TestRMHA {
|
||||||
fail("Should not throw any exceptions.");
|
fail("Should not throw any exceptions.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void verifyClusterMetrics(int activeNodes, int appsSubmitted,
|
||||||
|
int appsPending, int containersPending, int availableMB,
|
||||||
|
int activeApplications) {
|
||||||
|
QueueMetrics metrics = rm.getResourceScheduler().getRootQueueMetrics();
|
||||||
|
// verify queue metrics
|
||||||
|
assertMetric("appsSubmitted", appsSubmitted, metrics.getAppsSubmitted());
|
||||||
|
assertMetric("appsPending", appsPending, metrics.getAppsPending());
|
||||||
|
assertMetric("containersPending", containersPending,
|
||||||
|
metrics.getPendingContainers());
|
||||||
|
assertMetric("availableMB", availableMB, metrics.getAvailableMB());
|
||||||
|
assertMetric("activeApplications", activeApplications,
|
||||||
|
metrics.getActiveApps());
|
||||||
|
// verify node metric
|
||||||
|
ClusterMetrics clusterMetrics = ClusterMetrics.getMetrics();
|
||||||
|
assertMetric("activeNodes", activeNodes, clusterMetrics.getNumActiveNMs());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertMetric(String metricName, int expected, int actual) {
|
||||||
|
assertEquals("Incorrect value for metric " + metricName, expected, actual);
|
||||||
|
}
|
||||||
|
|
||||||
@SuppressWarnings("rawtypes")
|
@SuppressWarnings("rawtypes")
|
||||||
class MyCountingDispatcher extends AbstractService implements Dispatcher {
|
class MyCountingDispatcher extends AbstractService implements Dispatcher {
|
||||||
|
|
Loading…
Reference in New Issue