diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 412999f4d0f..9f0e8e1f971 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -542,6 +542,9 @@ Release 2.8.0 - UNRELEASED YARN-4405. Support node label store in non-appendable file system. (Wangda Tan via jianhe) + YARN-3946. Update exact reason as to why a submitted app is in ACCEPTED state to + app's diagnostic message. (Naganarasimha G R via wangda) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 7a2b717cc73..c4c8d2eb33d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -645,7 +645,8 @@ public class RMAppImpl implements RMApp, Recoverable { progress = currentAttempt.getProgress(); logAggregationStatus = this.getLogAggregationStatusForAppReport(); } - diags = this.diagnostics.toString(); + //if the diagnostics is not already set get it from attempt + diags = getDiagnostics().toString(); if (currentAttempt != null && currentAttempt.getAppAttemptState() == RMAppAttemptState.LAUNCHED) { @@ -750,8 +751,13 @@ public class RMAppImpl implements RMApp, Recoverable { @Override public StringBuilder getDiagnostics() { this.readLock.lock(); - try { + if (diagnostics.length() == 0 && getCurrentAppAttempt() != null) { + String appAttemptDiagnostics = getCurrentAppAttempt().getDiagnostics(); + if (appAttemptDiagnostics != null) { + return new StringBuilder(appAttemptDiagnostics); + } + } return this.diagnostics; } finally { this.readLock.unlock(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java index 4dd834580b0..91aee402829 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttempt.java @@ -246,4 +246,10 @@ public interface RMAppAttempt extends EventHandler { * @return the finish time of the application attempt. */ long getFinishTime(); + + /** + * To capture Launch diagnostics of the app. + * @param amLaunchDiagnostics + */ + void updateAMLaunchDiagnostics(String amLaunchDiagnostics); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 88a89b58543..8aefe9f908d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -73,11 +73,11 @@ import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventT import org.apache.hadoop.yarn.server.resourcemanager.blacklist.BlacklistManager; import org.apache.hadoop.yarn.server.resourcemanager.blacklist.BlacklistUpdates; import org.apache.hadoop.yarn.server.resourcemanager.blacklist.DisabledBlacklistManager; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData; -import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState; import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationAttemptStateData; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.ApplicationStateData; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; @@ -91,6 +91,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeFinishedContainersPulledByAMEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; @@ -187,6 +188,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { private ResourceRequest amReq = null; private BlacklistManager blacklistedNodesForAM = null; + private String amLaunchDiagnostics; + private static final StateMachineFactory 40% of label "X" in queue A1 // Since we have 2 users, 50% of 4Gb will be max for each user. Here user1 // has already crossed this 2GB limit, hence this app will be pending. - rm1.submitApp(GB, "app", user_1, null, "a1", "x"); + RMApp pendingApp = rm1.submitApp(GB, "app", user_1, null, "a1", "x"); // Verify active applications count per user and also in queue level. Assert.assertEquals(3, leafQueue.getNumActiveApplications()); @@ -389,6 +411,14 @@ public class TestApplicationLimitsByPartition { Assert.assertEquals(2, leafQueue.getNumActiveApplications(user_1)); Assert.assertEquals(1, leafQueue.getNumPendingApplications(user_1)); Assert.assertEquals(1, leafQueue.getNumPendingApplications()); + + //verify Diagnostic messages + Assert.assertTrue("AM diagnostics not set properly", + pendingApp.getDiagnostics().toString() + .contains(AMState.INACTIVATED.getDiagnosticMessage())); + Assert.assertTrue("AM diagnostics not set properly", + pendingApp.getDiagnostics().toString().contains( + CSAMContainerLaunchDiagnosticsConstants.USER_AM_RESOURCE_LIMIT_EXCEED)); rm1.close(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java index 91666df1a4d..dff82ca5b47 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestNodeLabelContainerAllocation.java @@ -1028,10 +1028,11 @@ public class TestNodeLabelContainerAllocation { rm1.getRMContext().setNodeLabelManager(mgr); rm1.start(); - MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); // label = x + String nodeIdStr = "h1:1234"; + MockNM nm1 = rm1.registerNode(nodeIdStr, 8 * GB); // label = x // launch an app to queue b1 (label = y), AM container should be launched in nm3 - rm1.submitApp(1 * GB, "app", "user", null, "b1"); + RMApp app = rm1.submitApp(1 * GB, "app", "user", null, "b1"); CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); @@ -1040,7 +1041,17 @@ public class TestNodeLabelContainerAllocation { for (int i = 0; i < 50; i++) { cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); } - + + Assert.assertTrue( + "Scheduler diagnostics should have reason for not assigning the node", + app.getDiagnostics().toString().contains( + CSAMContainerLaunchDiagnosticsConstants.SKIP_AM_ALLOCATION_IN_IGNORE_EXCLUSIVE_MODE)); + + Assert.assertTrue( + "Scheduler diagnostics should have last processed node information", + app.getDiagnostics().toString().contains( + CSAMContainerLaunchDiagnosticsConstants.LAST_NODE_PROCESSED_MSG + + nodeIdStr + " ( Partition : [x]")); Assert.assertEquals(0, cs.getSchedulerNode(nm1.getNodeId()) .getNumContainers()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java index 884de2aed81..49de4787aae 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import static org.mockito.Matchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; @@ -53,6 +54,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManager; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.AMState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; @@ -65,6 +67,7 @@ import org.apache.log4j.LogManager; import org.apache.log4j.Logger; import org.junit.Before; import org.junit.Test; +import org.mockito.Mockito; public class TestReservations { @@ -188,6 +191,9 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_0 = spy(app_0); + Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -196,6 +202,9 @@ public class TestReservations { .getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_1 = spy(app_1); + Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); a.submitApplicationAttempt(app_1, user_0); // Setup some nodes @@ -348,6 +357,9 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_0 = spy(app_0); + Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -356,6 +368,9 @@ public class TestReservations { .getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_1 = spy(app_1); + Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); a.submitApplicationAttempt(app_1, user_0); // Setup some nodes @@ -502,6 +517,9 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_0 = spy(app_0); + Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -510,6 +528,9 @@ public class TestReservations { .getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_1 = spy(app_1); + Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); a.submitApplicationAttempt(app_1, user_0); // Setup some nodes @@ -758,6 +779,9 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_0 = spy(app_0); + Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -766,6 +790,9 @@ public class TestReservations { .getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_1 = spy(app_1); + Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); a.submitApplicationAttempt(app_1, user_0); // Setup some nodes @@ -927,6 +954,9 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_0 = spy(app_0); + Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -934,6 +964,9 @@ public class TestReservations { .getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_1 = spy(app_1); + Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); a.submitApplicationAttempt(app_1, user_0); // Setup some nodes @@ -1068,6 +1101,9 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_0 = spy(app_0); + Mockito.doNothing().when(app_0).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -1076,6 +1112,9 @@ public class TestReservations { .getMockApplicationAttemptId(1, 0); FiCaSchedulerApp app_1 = new FiCaSchedulerApp(appAttemptId_1, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + app_1 = spy(app_1); + Mockito.doNothing().when(app_1).updateAMContainerDiagnostics(any(AMState.class), + any(String.class)); a.submitApplicationAttempt(app_1, user_0); // Setup some nodes diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java index d7e80e86590..d0a8c27e528 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesApps.java @@ -1387,8 +1387,8 @@ public class TestRMWebServicesApps extends JerseyTestBase { assertEquals("progress doesn't match", 0, progress, 0.0); WebServicesTestUtils.checkStringMatch("trackingUI", "UNASSIGNED", trackingUI); - WebServicesTestUtils.checkStringMatch("diagnostics", app.getDiagnostics() - .toString(), diagnostics); + WebServicesTestUtils.checkStringEqual("diagnostics", + app.getDiagnostics().toString(), diagnostics); assertEquals("clusterId doesn't match", ResourceManager.getClusterTimeStamp(), clusterId); assertEquals("startedTime doesn't match", app.getStartTime(), startedTime);