From b6be5f869a221810cbc95c833336bea6b9d4d087 Mon Sep 17 00:00:00 2001 From: Jonathan Hung Date: Tue, 27 Aug 2019 15:35:17 -0700 Subject: [PATCH] YARN-9438. launchTime not written to state store for running applications (cherry picked from commit 9568656cd21d9c02168e18ce35c6726077bbf3a1) (cherry picked from commit 0c498de6e87c6bdc959afa31deb03d0907e0e1a1) (cherry picked from commit f73842780e3e5daff2db4d30771a194f3bc47d6d) --- .../server/resourcemanager/RMAppManager.java | 2 ++ .../resourcemanager/rmapp/RMAppImpl.java | 6 +++++ .../scheduler/capacity/CapacityScheduler.java | 1 + .../server/resourcemanager/TestRMRestart.java | 19 +++++++++------- .../rmapp/TestRMAppTransitions.java | 22 +++++++++++++++++++ 5 files changed, 42 insertions(+), 8 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java index d6a4d2f782d..b9da29a5c64 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java @@ -615,6 +615,7 @@ public class RMAppManager implements EventHandler, app.getStartTime(), app.getApplicationSubmissionContext(), app.getUser(), app.getCallerContext()); appState.setApplicationTimeouts(currentExpireTimeouts); + appState.setLaunchTime(app.getLaunchTime()); // update to state store. Though it synchronous call, update via future to // know any exception has been set. It is required because in non-HA mode, @@ -740,6 +741,7 @@ public class RMAppManager implements EventHandler, app.getApplicationSubmissionContext(), app.getUser(), app.getCallerContext()); appState.setApplicationTimeouts(app.getApplicationTimeouts()); + appState.setLaunchTime(app.getLaunchTime()); rmContext.getStateStore().updateApplicationStateSynchronously(appState, false, future); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 63acaeab100..b8b4c9d9dd5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -1050,6 +1050,12 @@ public class RMAppImpl implements RMApp, Recoverable { app.getApplicationId()+", attemptId: "+ app.getCurrentAppAttempt().getAppAttemptId()+ "launchTime: "+event.getTimestamp()); + ApplicationStateData appState = ApplicationStateData.newInstance( + app.submitTime, app.startTime, app.submissionContext, app.user, + app.callerContext); + appState.setApplicationTimeouts(app.getApplicationTimeouts()); + appState.setLaunchTime(event.getTimestamp()); + app.rmContext.getStateStore().updateApplicationState(appState); app.launchTime = event.getTimestamp(); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index e70e7cc30ec..84676873386 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -2315,6 +2315,7 @@ public class CapacityScheduler extends rmApp.getApplicationSubmissionContext(), rmApp.getUser(), rmApp.getCallerContext()); appState.setApplicationTimeouts(rmApp.getApplicationTimeouts()); + appState.setLaunchTime(rmApp.getLaunchTime()); rmContext.getStateStore().updateApplicationStateSynchronously(appState, false, future); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index bb2c448d35a..2520e6e2cde 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -686,13 +686,14 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { @Override public void updateApplicationStateInternal(ApplicationId appId, ApplicationStateData appStateData) throws Exception { - if (count == 0) { - // do nothing; simulate app final state is not saved. + if (count == 1) { + // Application state is updated on attempt launch. + // After that, do nothing; simulate app final state is not saved. LOG.info(appId + " final state is not saved."); - count++; } else { super.updateApplicationStateInternal(appId, appStateData); } + count++; } }; memStore.init(conf); @@ -706,7 +707,6 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { MockNM nm1 = rm1.registerNode("127.0.0.1:1234", 15120); RMApp app0 = rm1.submitApp(200); MockAM am0 = MockRM.launchAndRegisterAM(app0, rm1, nm1); - FinishApplicationMasterRequest req = FinishApplicationMasterRequest.newInstance( FinalApplicationStatus.SUCCEEDED, "", ""); @@ -1729,8 +1729,11 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED); rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED); - Assert.assertEquals(1, ((TestMemoryRMStateStore) memStore).updateAttempt); - Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp); + // count = 1 on storing RMApp launchTime + // count = 2 on storing attempt state on kill + // count = 3 on storing app state on kill + Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateAttempt); + Assert.assertEquals(3, ((TestMemoryRMStateStore) memStore).updateApp); } // Test Application that fails on submission is saved in state store. @@ -2479,8 +2482,6 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { // create an app and finish the app. RMApp app0 = rm1.submitApp(200); - ApplicationStateData app0State = memStore.getState().getApplicationState() - .get(app0.getApplicationId()); MockAM am0 = launchAndFailAM(app0, rm1, nm1); MockAM am1 = launchAndFailAM(app0, rm1, nm1); @@ -2489,6 +2490,8 @@ public class TestRMRestart extends ParameterizedSchedulerTestBase { // am1 is missed from MemoryRMStateStore memStore.removeApplicationAttemptInternal(am1.getApplicationAttemptId()); + ApplicationStateData app0State = memStore.getState().getApplicationState() + .get(app0.getApplicationId()); ApplicationAttemptStateData am2State = app0State.getAttempt( am2.getApplicationAttemptId()); // am2's state is not consistent: MemoryRMStateStore just saved its initial diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 70887e0f4bf..5a6c16f448b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -468,6 +468,13 @@ public class TestRMAppTransitions { any(ApplicationStateData.class)); } + private void assertAppStateLaunchTimeSaved(long expectedLaunchTime) { + ArgumentCaptor state = + ArgumentCaptor.forClass(ApplicationStateData.class); + verify(store, times(1)).updateApplicationState(state.capture()); + assertEquals(expectedLaunchTime, state.getValue().getLaunchTime()); + } + private void assertKilled(RMApp application) { assertTimesAtFinish(application); assertAppState(RMAppState.KILLED, application); @@ -898,6 +905,21 @@ public class TestRMAppTransitions { verifyRMAppFieldsForFinalTransitions(application); } + @Test + public void testAppAcceptedAccepted() throws IOException { + LOG.info("--- START: testAppAcceptedAccepted ---"); + + RMApp application = testCreateAppAccepted(null); + // ACCEPTED => ACCEPTED event RMAppEventType.ATTEMPT_LAUNCHED + RMAppEvent appAttemptLaunched = + new RMAppEvent(application.getApplicationId(), + RMAppEventType.ATTEMPT_LAUNCHED, 1234L); + application.handle(appAttemptLaunched); + rmDispatcher.await(); + assertAppState(RMAppState.ACCEPTED, application); + assertAppStateLaunchTimeSaved(1234L); + } + @Test public void testAppAcceptedAttemptKilled() throws IOException, InterruptedException {