From f56e36935da69b5b12086d80bfe1cdb446213882 Mon Sep 17 00:00:00 2001 From: Weiwei Yang Date: Wed, 19 Sep 2018 19:31:07 +0800 Subject: [PATCH] YARN-8771. CapacityScheduler fails to unreserve when cluster resource contains empty resource type. Contributed by Tao Yang. (cherry picked from commit 0712537e799bc03855d548d1f4bd690dd478b871) --- .../allocator/RegularContainerAllocator.java | 3 +- .../capacity/TestContainerAllocation.java | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index 69e90c68a43..c6b3ca9bfd4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -530,8 +530,7 @@ public class RegularContainerAllocator extends AbstractContainerAllocator { currentResoureLimits.getAmountNeededUnreserve()); boolean needToUnreserve = - Resources.greaterThan(rc, clusterResource, - resourceNeedToUnReserve, Resources.none()); + rc.isAnyMajorResourceAboveZero(resourceNeedToUnReserve); RMContainer unreservedContainer = null; boolean reservationsContinueLooking = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index 48a0f3e9f54..f373f1573bd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.RMSecretManagerService; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.resource.TestResourceProfiles; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -58,7 +59,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemoved import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; +import org.apache.hadoop.yarn.util.resource.TestResourceUtils; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -982,4 +986,68 @@ public class TestContainerAllocation { Assert.assertEquals(2, lq.getMetrics().getAppsPending()); rm1.close(); } + + @Test(timeout = 60000) + public void testUnreserveWhenClusterResourceHasEmptyResourceType() + throws Exception { + /** + * Test case: + * Create a cluster with two nodes whose node resource both are + * <8GB, 8core, 0>, create queue "a" whose max-resource is <8GB, 8 core, 0>, + * submit app1 to queue "a" whose am use <1GB, 1 core, 0> and launch on nm1, + * submit app2 to queue "b" whose am use <1GB, 1 core, 0> and launch on nm1, + * app1 asks two <7GB, 1core> containers and nm1 do 1 heartbeat, + * then scheduler reserves one container on nm1. + * + * After nm2 do next node heartbeat, scheduler should unreserve the reserved + * container on nm1 then allocate a container on nm2. + */ + TestResourceUtils.addNewTypesToResources("resource1"); + CapacitySchedulerConfiguration newConf = + (CapacitySchedulerConfiguration) TestUtils + .getConfigurationWithMultipleQueues(conf); + newConf.setClass(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, + DominantResourceCalculator.class, ResourceCalculator.class); + newConf + .setBoolean(TestResourceProfiles.TEST_CONF_RESET_RESOURCE_TYPES, false); + // Set maximum capacity of queue "a" to 50 + newConf.setMaximumCapacity(CapacitySchedulerConfiguration.ROOT + ".a", 50); + MockRM rm1 = new MockRM(newConf); + + RMNodeLabelsManager nodeLabelsManager = new NullRMNodeLabelsManager(); + nodeLabelsManager.init(newConf); + rm1.getRMContext().setNodeLabelManager(nodeLabelsManager); + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // launch an app to queue "a", AM container should be launched on nm1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "a"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + // launch another app to queue "b", AM container should be launched on nm1 + RMApp app2 = rm1.submitApp(1 * GB, "app", "user", null, "b"); + MockRM.launchAndRegisterAM(app2, rm1, nm1); + + am1.allocate("*", 7 * GB, 2, new ArrayList()); + + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + RMNode rmNode2 = rm1.getRMContext().getRMNodes().get(nm2.getNodeId()); + FiCaSchedulerApp schedulerApp1 = + cs.getApplicationAttempt(am1.getApplicationAttemptId()); + + // Do nm1 heartbeats 1 times, will reserve a container on nm1 for app1 + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + Assert.assertEquals(1, schedulerApp1.getLiveContainers().size()); + Assert.assertEquals(1, schedulerApp1.getReservedContainers().size()); + + // Do nm2 heartbeats 1 times, will unreserve a container on nm1 + // and allocate a container on nm2 for app1 + cs.handle(new NodeUpdateSchedulerEvent(rmNode2)); + Assert.assertEquals(2, schedulerApp1.getLiveContainers().size()); + Assert.assertEquals(0, schedulerApp1.getReservedContainers().size()); + + rm1.close(); + } }