From 1c8ab41e8b3477a93cbdf0b553a87b131eb60e1f Mon Sep 17 00:00:00 2001 From: Varun Saxena Date: Sat, 29 Oct 2016 13:47:39 +0530 Subject: [PATCH] YARN-5773. RM recovery too slow due to LeafQueue#activateApplications (Bibin A Chundatt via Varun Saxena) --- ...AMContainerLaunchDiagnosticsConstants.java | 2 ++ .../scheduler/capacity/LeafQueue.java | 28 +++++++++++++------ .../resourcemanager/TestClientRMService.java | 2 +- .../capacity/TestApplicationPriority.java | 12 ++++---- .../capacity/TestCapacityScheduler.java | 5 +++- 5 files changed, 34 insertions(+), 15 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAMContainerLaunchDiagnosticsConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAMContainerLaunchDiagnosticsConstants.java index 4cc062c5fb7..6de2134e8c6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAMContainerLaunchDiagnosticsConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAMContainerLaunchDiagnosticsConstants.java @@ -32,4 +32,6 @@ public interface CSAMContainerLaunchDiagnosticsConstants { String USER_AM_RESOURCE_LIMIT_EXCEED = "User's AM resource limit exceeded. "; String LAST_NODE_PROCESSED_MSG = " Last Node which was processed for the application : "; + String CLUSTER_RESOURCE_EMPTY = + "Skipping AM assignment as cluster resource is empty. "; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index eecd4ba4a3d..3c519617963 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -752,9 +752,11 @@ public class LeafQueue extends AbstractCSQueue { } else{ application.updateAMContainerDiagnostics(AMState.INACTIVATED, CSAMContainerLaunchDiagnosticsConstants.QUEUE_AM_RESOURCE_LIMIT_EXCEED); - LOG.info("Not activating application " + applicationId - + " as amIfStarted: " + amIfStarted + " exceeds amLimit: " - + amLimit); + if (LOG.isDebugEnabled()) { + LOG.debug("Not activating application " + applicationId + + " as amIfStarted: " + amIfStarted + " exceeds amLimit: " + + amLimit); + } continue; } } @@ -785,10 +787,11 @@ public class LeafQueue extends AbstractCSQueue { } else{ application.updateAMContainerDiagnostics(AMState.INACTIVATED, CSAMContainerLaunchDiagnosticsConstants.USER_AM_RESOURCE_LIMIT_EXCEED); - LOG.info( - "Not activating application " + applicationId + " for user: " - + user + " as userAmIfStarted: " + userAmIfStarted - + " exceeds userAmLimit: " + userAMLimit); + if (LOG.isDebugEnabled()) { + LOG.debug("Not activating application " + applicationId + + " for user: " + user + " as userAmIfStarted: " + + userAmIfStarted + " exceeds userAmLimit: " + userAMLimit); + } continue; } } @@ -824,7 +827,16 @@ public class LeafQueue extends AbstractCSQueue { application); // Activate applications - activateApplications(); + if (Resources.greaterThan(resourceCalculator, lastClusterResource, + lastClusterResource, Resources.none())) { + activateApplications(); + } else { + application.updateAMContainerDiagnostics(AMState.INACTIVATED, + CSAMContainerLaunchDiagnosticsConstants.CLUSTER_RESOURCE_EMPTY); + LOG.info("Skipping activateApplications for " + + application.getApplicationAttemptId() + + " since cluster resource is " + Resources.none()); + } LOG.info( "Application added -" + " appId: " + application.getApplicationId() diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index cee9086514a..706c274ddf1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -1651,7 +1651,7 @@ public class TestClientRMService { MockRM rm = new MockRM(conf); rm.init(conf); rm.start(); - + rm.registerNode("host1:1234", 1024); // Start app1 with appPriority 5 RMApp app1 = rm.submitApp(1024, Priority.newInstance(appPriority)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java index d862c75ea46..8bd531452c9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestApplicationPriority.java @@ -693,16 +693,18 @@ public class TestApplicationPriority { Thread.sleep(500); } - // Before NM registration, AMResourceLimit threshold is 0. So 1st - // applications get activated nevertheless of AMResourceLimit threshold - // Two applications are in pending - Assert.assertEquals(1, defaultQueue.getNumActiveApplications()); - Assert.assertEquals(2, defaultQueue.getNumPendingApplications()); + // Before NM registration, AMResourceLimit threshold is 0. So no + // applications get activated. + Assert.assertEquals(0, defaultQueue.getNumActiveApplications()); // NM resync to new RM nm1.registerNode(); dispatcher1.await(); + Assert.assertEquals(2, defaultQueue.getNumActiveApplications()); + Assert.assertEquals(1, defaultQueue.getNumPendingApplications()); + + // wait for activating one applications count = 5; while (count-- > 0) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 6bcf949d74f..865449f41ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -2916,12 +2916,14 @@ public class TestCapacityScheduler { @Test(timeout = 30000) public void testAMUsedResource() throws Exception { MockRM rm = setUpMove(); + rm.registerNode("127.0.0.1:1234", 4 * GB); + Configuration conf = rm.getConfig(); int minAllocMb = conf.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB); int amMemory = 50; - assertTrue("AM memory is greater than or equql to minAllocation", + assertTrue("AM memory is greater than or equal to minAllocation", amMemory < minAllocMb); Resource minAllocResource = Resource.newInstance(minAllocMb, 1); String queueName = "a1"; @@ -3270,6 +3272,7 @@ public class TestCapacityScheduler { private void verifyAMLimitForLeafQueue(CapacitySchedulerConfiguration config) throws Exception { MockRM rm = setUpMove(config); + rm.registerNode("127.0.0.1:1234", 2 * GB); String queueName = "a1"; String userName = "user_0";