YARN-6218. Fix TestAMRMClient when using FairScheduler. (Miklos Szegedi via rchiang)

(cherry picked from commit 30b91ff9540e35195af834d1bd5983114a556c6d)
2017-03-03 12:55:45 -08:00 · 2017-03-03 12:55:45 -08:00 · 303ee13e3c
commit 303ee13e3c
parent 5eca427da2
2 changed files with 87 additions and 55 deletions
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java
@ -86,47 +86,71 @@
 import org.apache.hadoop.yarn.ipc.YarnRPC;
 import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
 import org.apache.hadoop.yarn.server.MiniYARNCluster;
+import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
 import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler;
 import org.apache.hadoop.yarn.server.resourcemanager.security.AMRMTokenSecretManager;
 import org.apache.hadoop.yarn.server.utils.BuilderUtils;
 import org.apache.hadoop.yarn.util.Records;
 import org.junit.After;
-import org.junit.AfterClass;
 import org.junit.Assert;
+import org.junit.Assume;
 import org.junit.Before;
-import org.junit.BeforeClass;
 import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
 import org.mockito.invocation.InvocationOnMock;
 import org.mockito.stubbing.Answer;
 import org.mortbay.log.Log;

 import com.google.common.base.Supplier;

+/**
+ * Test application master client class to resource manager.
+ */
+@RunWith(value = Parameterized.class)
 public class TestAMRMClient {
-  static Configuration conf = null;
-  static MiniYARNCluster yarnCluster = null;
-  static YarnClient yarnClient = null;
-  static List<NodeReport> nodeReports = null;
-  static ApplicationAttemptId attemptId = null;
-  static int nodeCount = 3;
+  private String schedulerName = null;
+  private Configuration conf = null;
+  private MiniYARNCluster yarnCluster = null;
+  private YarnClient yarnClient = null;
+  private List<NodeReport> nodeReports = null;
+  private ApplicationAttemptId attemptId = null;
+  private int nodeCount = 3;
  
  static final int rolling_interval_sec = 13;
  static final long am_expire_ms = 4000;

-  static Resource capability;
-  static Priority priority;
-  static Priority priority2;
-  static String node;
-  static String rack;
-  static String[] nodes;
-  static String[] racks;
+  private Resource capability;
+  private Priority priority;
+  private Priority priority2;
+  private String node;
+  private String rack;
+  private String[] nodes;
+  private String[] racks;
  private final static int DEFAULT_ITERATION = 3;

-  @BeforeClass
-  public static void setup() throws Exception {
+  public TestAMRMClient(String schedulerName) {
+    this.schedulerName = schedulerName;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object[]> data() {
+    List<Object[]> list = new ArrayList<Object[]>(2);
+    list.add(new Object[] {CapacityScheduler.class.getName()});
+    list.add(new Object[] {FairScheduler.class.getName()});
+    return list;
+  }
+
+  @Before
+  public void setup() throws Exception {
    // start minicluster
    conf = new YarnConfiguration();
+    conf.set(YarnConfiguration.RM_SCHEDULER, schedulerName);
    conf.setLong(
      YarnConfiguration.RM_AMRM_TOKEN_MASTER_KEY_ROLLING_INTERVAL_SECS,
      rolling_interval_sec);
@ -160,10 +184,7 @@ public static void setup() throws Exception {
    rack = nodeReports.get(0).getRackName();
    nodes = new String[]{ node };
    racks = new String[]{ rack };
-  }
-  
-  @Before
-  public void startApp() throws Exception {
+
    // submit new app
    ApplicationSubmissionContext appContext = 
        yarnClient.createApplication().getApplicationSubmissionContext();
@ -221,13 +242,10 @@ Collections.<String, LocalResource> emptyMap(),
  }
  
  @After
-  public void cancelApp() throws YarnException, IOException {
+  public void teardown() throws YarnException, IOException {
    yarnClient.killApplication(attemptId.getApplicationId());
    attemptId = null;
-  }
-  
-  @AfterClass
-  public static void tearDown() {
+
    if (yarnClient != null && yarnClient.getServiceState() == STATE.STARTED) {
      yarnClient.stop();
    }
@ -663,8 +681,8 @@ public void testAMRMClientMatchStorage() throws YarnException, IOException {
          amClient.releaseAssignedContainer(container.getId());
        }
        if(allocatedContainerCount < containersRequestedAny) {
-          // sleep to let NM's heartbeat to RM and trigger allocations
-          sleep(100);
+          // let NM heartbeat to RM and trigger allocations
+          triggerSchedulingWithNMHeartBeat();
        }
      }
      
@ -685,7 +703,27 @@ public void testAMRMClientMatchStorage() throws YarnException, IOException {
      }
    }
  }
-  
+
+  /**
+   * Make sure we get allocations regardless of timing issues.
+   */
+  private void triggerSchedulingWithNMHeartBeat() {
+    // Simulate fair scheduler update thread
+    RMContext context = yarnCluster.getResourceManager().getRMContext();
+    if (context.getScheduler() instanceof FairScheduler) {
+      FairScheduler scheduler = (FairScheduler)context.getScheduler();
+      scheduler.update();
+    }
+    // Trigger NM's heartbeat to RM and trigger allocations
+    for (RMNode rmNode : context.getRMNodes().values()) {
+      context.getScheduler().handle(new NodeUpdateSchedulerEvent(rmNode));
+    }
+    if (context.getScheduler() instanceof FairScheduler) {
+      FairScheduler scheduler = (FairScheduler)context.getScheduler();
+      scheduler.update();
+    }
+  }
+
  @Test (timeout=60000)
  public void testAllocationWithBlacklist() throws YarnException, IOException {
    AMRMClientImpl<ContainerRequest> amClient = null;
@ -817,8 +855,8 @@ private int getAllocatedContainersNumber(
      allocatedContainerCount += allocResponse.getAllocatedContainers().size();
        
      if(allocatedContainerCount == 0) {
-        // sleep to let NM's heartbeat to RM and trigger allocations
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
      }
    }
    return allocatedContainerCount;
@ -940,6 +978,8 @@ public void testAskWithInvalidNodeLabels() {
  @Test(timeout=60000)
  public void testAMRMClientWithContainerResourceChange()
      throws YarnException, IOException {
+    // Fair scheduler does not support resource change
+    Assume.assumeTrue(schedulerName.equals(CapacityScheduler.class.getName()));
    AMRMClient<ContainerRequest> amClient = null;
    try {
      // start am rm client
@ -987,8 +1027,8 @@ private List<Container> allocateAndStartContainers(
    }
    // send allocation requests
    amClient.allocate(0.1f);
-    // sleep to let NM's heartbeat to RM and trigger allocations
-    sleep(150);
+    // let NM heartbeat to RM and trigger allocations
+    triggerSchedulingWithNMHeartBeat();
    // get allocations
    AllocateResponse allocResponse = amClient.allocate(0.1f);
    List<Container> containers = allocResponse.getAllocatedContainers();
@ -1018,14 +1058,14 @@ private List<Container> allocateAndStartContainers(
          if (status.getState() == ContainerState.RUNNING) {
            break;
          }
-          sleep(100);
+          sleep(10);
        }
      }
    } catch (YarnException e) {
      throw new AssertionError("Exception is not expected: " + e);
    }
-    // sleep to let NM's heartbeat to RM to confirm container launch
-    sleep(200);
+    // let NM's heartbeat to RM to confirm container launch
+    triggerSchedulingWithNMHeartBeat();
    return containers;
  }

@ -1075,7 +1115,7 @@ private void doContainerResourceChange(
        allocResponse.getUpdatedContainers();
    Assert.assertEquals(1, updatedContainers.size());
    // we should get increase allocation after the next NM's heartbeat to RM
-    sleep(150);
+    triggerSchedulingWithNMHeartBeat();
    // get allocations
    allocResponse = amClient.allocate(0.1f);
    updatedContainers =
@ -1138,8 +1178,8 @@ private void testAllocation(final AMRMClientImpl<ContainerRequest> amClient)
      }
      
      if(allocatedContainerCount < containersRequestedAny) {
-        // sleep to let NM's heartbeat to RM and trigger allocations
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
      }
    }
    
@ -1221,8 +1261,8 @@ private void waitForContainerCompletion(int numIterations,
        }
      }
      if(numIterations > 0) {
-        // sleep to make sure NM's heartbeat
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
      }
    }
    assertEquals(0, amClient.ask.size());
@ -1280,8 +1320,8 @@ private void testAllocRequestId(
      }

      if(allocatedContainers.size() < containersRequestedAny) {
-        // sleep to let NM's heartbeat to RM and trigger allocations
-        sleep(100);
+        // let NM heartbeat to RM and trigger allocations
+        triggerSchedulingWithNMHeartBeat();
      }
    }

@ -1392,12 +1432,7 @@ public void testAMRMClientOnAMRMTokenRollOver() throws YarnException,
      while (System.currentTimeMillis() - startTime <
          rolling_interval_sec * 1000) {
        amClient.allocate(0.1f);
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          // TODO Auto-generated catch block
-          e.printStackTrace();
-        }
+        sleep(1000);
      }
      amClient.allocate(0.1f);

@ -1457,11 +1492,7 @@ public ApplicationMasterProtocol run() {
          }
        }
        amClient.allocate(0.1f);
-        try {
-          Thread.sleep(1000);
-        } catch (InterruptedException e) {
-          // DO NOTHING
-        }
+        sleep(1000);
      }

      try {
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java
@ -353,7 +353,8 @@ public void run() {
   * fair shares, deficits, minimum slot allocations, and amount of used and
   * required resources per job.
   */
-  protected void update() {
+  @VisibleForTesting
+  public void update() {
    try {
      writeLock.lock();