From 8d6fb7e9203f09618a796970b51a564dc1ff4185 Mon Sep 17 00:00:00 2001 From: Jian He Date: Mon, 10 Mar 2014 18:03:47 +0000 Subject: [PATCH] Merge r1576023 from trunk. Fixed ClientRMService#forceKillApplication not killing unmanaged application. Contributed by Karthik Kambatla git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1576026 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../ApplicationMasterService.java | 21 ++---- .../resourcemanager/ClientRMService.java | 15 +++-- .../server/resourcemanager/rmapp/RMApp.java | 9 +-- .../resourcemanager/rmapp/RMAppImpl.java | 7 +- .../yarn/server/resourcemanager/MockRM.java | 21 ++++-- .../resourcemanager/TestClientRMService.java | 65 ++++++++++++++++++- .../TestKillApplicationWithRMHA.java | 2 +- .../applicationsmanager/MockAsm.java | 2 +- .../resourcemanager/rmapp/MockRMApp.java | 2 +- 10 files changed, 106 insertions(+), 41 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c33ad10bcd0..fdbc44bfb40 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -418,6 +418,9 @@ Release 2.4.0 - UNRELEASED YARN-1787. Fixed help messages for applicationattempt and container sub-commands in bin/yarn. (Zhijie Shen via vinodkv) + YARN-1793. Fixed ClientRMService#forceKillApplication not killing unmanaged + application. (Karthik Kambatla via jianhe) + Release 2.3.1 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 75a57dd5690..6b2cb7f876a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -351,27 +351,18 @@ public class ApplicationMasterService extends AbstractService implements RMApp rmApp = rmContext.getRMApps().get(applicationAttemptId.getApplicationId()); - if (rmApp.getApplicationSubmissionContext().getUnmanagedAM()) { - // No recovery supported yet for unmanaged AM. Send the unregister event - // and (falsely) acknowledge state-store write immediately. - rmContext.getDispatcher().getEventHandler().handle( - new RMAppAttemptUnregistrationEvent(applicationAttemptId, request - .getTrackingUrl(), request.getFinalApplicationStatus(), request - .getDiagnostics())); + if (rmApp.isAppFinalStateStored()) { return FinishApplicationMasterResponse.newInstance(true); } - // Not an unmanaged-AM. - if (rmApp.isAppSafeToTerminate()) { - return FinishApplicationMasterResponse.newInstance(true); - } else { - // keep sending the unregister event as RM may crash in the meanwhile. - rmContext.getDispatcher().getEventHandler().handle( + rmContext.getDispatcher().getEventHandler().handle( new RMAppAttemptUnregistrationEvent(applicationAttemptId, request .getTrackingUrl(), request.getFinalApplicationStatus(), request .getDiagnostics())); - return FinishApplicationMasterResponse.newInstance(false); - } + + // For UnmanagedAMs, return true so they don't retry + return FinishApplicationMasterResponse.newInstance( + rmApp.getApplicationSubmissionContext().getUnmanagedAM()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index d676f0670ca..7715addaa99 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -396,15 +396,18 @@ public class ClientRMService extends AbstractService implements + ApplicationAccessType.MODIFY_APP.name() + " on " + applicationId)); } - if (application.isAppSafeToTerminate()) { + if (application.isAppFinalStateStored()) { RMAuditLogger.logSuccess(callerUGI.getShortUserName(), - AuditConstants.KILL_APP_REQUEST, "ClientRMService", applicationId); + AuditConstants.KILL_APP_REQUEST, "ClientRMService", applicationId); return KillApplicationResponse.newInstance(true); - } else { - this.rmContext.getDispatcher().getEventHandler() - .handle(new RMAppEvent(applicationId, RMAppEventType.KILL)); - return KillApplicationResponse.newInstance(false); } + + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMAppEvent(applicationId, RMAppEventType.KILL)); + + // For UnmanagedAMs, return true so they don't retry + return KillApplicationResponse.newInstance( + application.getApplicationSubmissionContext().getUnmanagedAM()); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java index 1a7e74ef672..391ccf6a5e2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java @@ -204,13 +204,10 @@ public interface RMApp extends EventHandler { Set getApplicationTags(); /** - * Check whether this application is safe to terminate. - * An application is deemed to be safe to terminate if it is an unmanaged - * AM or its state has been saved in state store. - * @return the flag which indicates whether this application is safe to - * terminate. + * Check whether this application's state has been saved to the state store. + * @return the flag indicating whether the applications's state is stored. */ - boolean isAppSafeToTerminate(); + boolean isAppFinalStateStored(); /** * Create the external user-facing state of ApplicationMaster from the diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index d84e3d3d599..8c9c52f239b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -1103,14 +1103,11 @@ public class RMAppImpl implements RMApp, Recoverable { } @Override - public boolean isAppSafeToTerminate() { + public boolean isAppFinalStateStored() { RMAppState state = getState(); return state.equals(RMAppState.FINISHING) || state.equals(RMAppState.FINISHED) || state.equals(RMAppState.FAILED) - || state.equals(RMAppState.KILLED) || - // If this is an unmanaged AM, we are safe to unregister since unmanaged - // AM will immediately go to FINISHED state on AM unregistration - getApplicationSubmissionContext().getUnmanagedAM(); + || state.equals(RMAppState.KILLED); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index e37f482e771..50b42db3dd6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -173,17 +173,28 @@ public class MockRM extends ResourceManager { } public RMApp submitApp(int masterMemory) throws Exception { + return submitApp(masterMemory, false); + } + + public RMApp submitApp(int masterMemory, boolean unmanaged) + throws Exception { return submitApp(masterMemory, "", UserGroupInformation.getCurrentUser() - .getShortUserName()); + .getShortUserName(), unmanaged); } // client public RMApp submitApp(int masterMemory, String name, String user) throws Exception { - return submitApp(masterMemory, name, user, null, false, null, - super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, - YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null); + return submitApp(masterMemory, name, user, false); } - + + public RMApp submitApp(int masterMemory, String name, String user, + boolean unmanaged) + throws Exception { + return submitApp(masterMemory, name, user, null, unmanaged, null, + super.getConfig().getInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, + YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS), null); + } + public RMApp submitApp(int masterMemory, String name, String user, Map acls) throws Exception { return submitApp(masterMemory, name, user, acls, false, null, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 7c496815ab2..0bef2e4a757 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -19,6 +19,8 @@ package org.apache.hadoop.yarn.server.resourcemanager; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.anyString; @@ -34,6 +36,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.EnumSet; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -61,6 +64,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetClusterNodesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.MoveApplicationAcrossQueuesRequest; import org.apache.hadoop.yarn.api.protocolrecords.RenewDelegationTokenRequest; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; @@ -218,7 +222,7 @@ public class TestClientRMService { } @Test - public void testForceKillApplication() throws YarnException { + public void testForceKillNonExistingApplication() throws YarnException { RMContext rmContext = mock(RMContext.class); when(rmContext.getRMApps()).thenReturn( new ConcurrentHashMap()); @@ -237,6 +241,58 @@ public class TestClientRMService { "application " + request.getApplicationId()); } } + + @Test + public void testForceKillApplication() throws Exception { + YarnConfiguration conf = new YarnConfiguration(); + MockRM rm = new MockRM(); + rm.init(conf); + rm.start(); + + ClientRMService rmService = rm.getClientRMService(); + GetApplicationsRequest getRequest = GetApplicationsRequest.newInstance( + EnumSet.of(YarnApplicationState.KILLED)); + + RMApp app1 = rm.submitApp(1024); + RMApp app2 = rm.submitApp(1024, true); + + assertEquals("Incorrect number of apps in the RM", 0, + rmService.getApplications(getRequest).getApplicationList().size()); + + KillApplicationRequest killRequest1 = + KillApplicationRequest.newInstance(app1.getApplicationId()); + KillApplicationRequest killRequest2 = + KillApplicationRequest.newInstance(app2.getApplicationId()); + + int killAttemptCount = 0; + for (int i = 0; i < 100; i++) { + KillApplicationResponse killResponse1 = + rmService.forceKillApplication(killRequest1); + killAttemptCount++; + if (killResponse1.getIsKillCompleted()) { + break; + } + Thread.sleep(10); + } + assertTrue("Kill attempt count should be greater than 1 for managed AMs", + killAttemptCount > 1); + assertEquals("Incorrect number of apps in the RM", 1, + rmService.getApplications(getRequest).getApplicationList().size()); + + KillApplicationResponse killResponse2 = + rmService.forceKillApplication(killRequest2); + assertTrue("Killing UnmanagedAM should falsely acknowledge true", + killResponse2.getIsKillCompleted()); + for (int i = 0; i < 100; i++) { + if (2 == + rmService.getApplications(getRequest).getApplicationList().size()) { + break; + } + Thread.sleep(10); + } + assertEquals("Incorrect number of apps in the RM", 2, + rmService.getApplications(getRequest).getApplicationList().size()); + } @Test (expected = ApplicationNotFoundException.class) public void testMoveAbsentApplication() throws YarnException { @@ -629,6 +685,12 @@ public class TestClientRMService { private SubmitApplicationRequest mockSubmitAppRequest(ApplicationId appId, String name, String queue, Set tags) { + return mockSubmitAppRequest(appId, name, queue, tags, false); + } + + private SubmitApplicationRequest mockSubmitAppRequest(ApplicationId appId, + String name, String queue, Set tags, boolean unmanaged) { + ContainerLaunchContext amContainerSpec = mock(ContainerLaunchContext.class); Resource resource = Resources.createResource( @@ -643,6 +705,7 @@ public class TestClientRMService { submissionContext.setResource(resource); submissionContext.setApplicationType(appType); submissionContext.setApplicationTags(tags); + submissionContext.setUnmanagedAM(unmanaged); SubmitApplicationRequest submitRequest = recordFactory.newRecordInstance(SubmitApplicationRequest.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestKillApplicationWithRMHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestKillApplicationWithRMHA.java index c0a623d4be4..13d29ac2993 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestKillApplicationWithRMHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestKillApplicationWithRMHA.java @@ -233,7 +233,7 @@ public class TestKillApplicationWithRMHA extends RMHATestBase{ KillApplicationRequest request) throws YarnException { ApplicationId applicationId = request.getApplicationId(); RMApp application = this.rmContext.getRMApps().get(applicationId); - if (application.isAppSafeToTerminate()) { + if (application.isAppFinalStateStored()) { return KillApplicationResponse.newInstance(true); } else { return KillApplicationResponse.newInstance(false); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index 01e5eea96cc..2cdbf95f4a8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -151,7 +151,7 @@ public abstract class MockAsm extends MockApps { } @Override - public boolean isAppSafeToTerminate() { + public boolean isAppFinalStateStored() { throw new UnsupportedOperationException("Not supported yet."); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java index ac3751a191c..b07525db8b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java @@ -224,7 +224,7 @@ public class MockRMApp implements RMApp { } @Override - public boolean isAppSafeToTerminate() { + public boolean isAppFinalStateStored() { return true; }