diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
index 65c15482ee1..7ce4d5d1b6c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMCommunicator.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
@@ -57,6 +58,8 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import com.sun.research.ws.wadl.Response;
+
/**
* Registers/unregisters to RM and sends heartbeats to RM.
*/
@@ -194,7 +197,15 @@ public abstract class RMCommunicator extends AbstractService
FinishApplicationMasterRequest request =
FinishApplicationMasterRequest.newInstance(finishState,
sb.toString(), historyUrl);
- scheduler.finishApplicationMaster(request);
+ while (true) {
+ FinishApplicationMasterResponse response =
+ scheduler.finishApplicationMaster(request);
+ if (response.getIsUnregistered()) {
+ break;
+ }
+ LOG.info("Waiting for application to be successfully unregistered.");
+ Thread.sleep(rmPollInterval);
+ }
} catch(Exception are) {
LOG.error("Exception while unregistering ", are);
}
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 6a56c6a9cb3..566793f4b8d 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -185,6 +185,9 @@ Release 2.1.1-beta - UNRELEASED
YARN-1189. NMTokenSecretManagerInNM is not being told when applications
have finished (Omkar Vinit Joshi via jlowe)
+ YARN-540. Race condition causing RM to potentially relaunch already
+ unregistered AMs on RM restart (Jian He via bikas)
+
Release 2.1.0-beta - 2013-08-22
INCOMPATIBLE CHANGES
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java
index 4317b675862..8de2c73d88a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/FinishApplicationMasterResponse.java
@@ -26,21 +26,52 @@ import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.util.Records;
/**
- *
The response sent by the ResourceManager
to a
- * ApplicationMaster
on it's completion.
+ *
+ * The response sent by the ResourceManager
to a
+ * ApplicationMaster
on it's completion.
+ *
*
- * Currently, this is empty.
+ *
+ * The response, includes:
+ *
+ * - A flag which indicates that the application has successfully unregistered
+ * with the RM and the application can safely stop.
+ *
+ *
+ * Note: The flag indicates whether the application has successfully
+ * unregistered and is safe to stop. The application may stop after the flag is
+ * true. If the application stops before the flag is true then the RM may retry
+ * the application .
*
* @see ApplicationMasterProtocol#finishApplicationMaster(FinishApplicationMasterRequest)
*/
@Public
@Stable
public abstract class FinishApplicationMasterResponse {
+
@Private
@Unstable
- public static FinishApplicationMasterResponse newInstance() {
+ public static FinishApplicationMasterResponse newInstance(
+ boolean isRemovedFromRMStateStore) {
FinishApplicationMasterResponse response =
Records.newRecord(FinishApplicationMasterResponse.class);
+ response.setIsUnregistered(isRemovedFromRMStateStore);
return response;
}
+
+ /**
+ * Get the flag which indicates that the application has successfully
+ * unregistered with the RM and the application can safely stop.
+ */
+ @Public
+ @Stable
+ public abstract boolean getIsUnregistered();
+
+ /**
+ * Set the flag which indicates that the application has successfully
+ * unregistered with the RM and the application can safely stop.
+ */
+ @Private
+ @Unstable
+ public abstract void setIsUnregistered(boolean isUnregistered);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
index 7b3d0cf77cd..36e1d458efd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto
@@ -52,6 +52,7 @@ message FinishApplicationMasterRequestProto {
}
message FinishApplicationMasterResponseProto {
+ optional bool isUnregistered = 1 [default = false];
}
message AllocateRequestProto {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
index c433b55b6ec..beee423a47c 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
import org.apache.hadoop.yarn.api.records.ContainerId;
@@ -300,11 +301,24 @@ public class AMRMClientImpl extends AMRMClient {
String appMessage, String appTrackingUrl) throws YarnException,
IOException {
Preconditions.checkArgument(appStatus != null,
- "AppStatus should not be null.");
+ "AppStatus should not be null.");
FinishApplicationMasterRequest request =
FinishApplicationMasterRequest.newInstance(appStatus, appMessage,
appTrackingUrl);
- rmClient.finishApplicationMaster(request);
+ try {
+ while (true) {
+ FinishApplicationMasterResponse response =
+ rmClient.finishApplicationMaster(request);
+ if (response.getIsUnregistered()) {
+ break;
+ }
+ LOG.info("Waiting for application to be successfully unregistered.");
+ Thread.sleep(100);
+ }
+ } catch (InterruptedException e) {
+ LOG.info("Interrupted while waiting for application"
+ + " to be removed from RMStateStore");
+ }
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java
index ff57eb42d77..1bad3744dcd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/FinishApplicationMasterResponsePBImpl.java
@@ -22,7 +22,9 @@ package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
+import org.apache.hadoop.yarn.proto.YarnProtos.ResourceRequestProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationMasterResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.FinishApplicationMasterResponseProtoOrBuilder;
import com.google.protobuf.TextFormat;
@@ -67,4 +69,24 @@ public class FinishApplicationMasterResponsePBImpl extends FinishApplicationMast
public String toString() {
return TextFormat.shortDebugString(getProto());
}
+
+ private void maybeInitBuilder() {
+ if (viaProto || builder == null) {
+ builder = FinishApplicationMasterResponseProto.newBuilder(proto);
+ }
+ viaProto = false;
+ }
+
+ @Override
+ public boolean getIsUnregistered() {
+ FinishApplicationMasterResponseProtoOrBuilder p =
+ viaProto ? proto : builder;
+ return p.getIsUnregistered();
+ }
+
+ @Override
+ public void setIsUnregistered(boolean isUnregistered) {
+ maybeInitBuilder();
+ builder.setIsUnregistered(isUnregistered);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
index a41792db1fd..fd39dad43a1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
@@ -72,6 +72,7 @@ import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.security.AMRMTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptRegistrationEvent;
@@ -303,9 +304,12 @@ public class ApplicationMasterService extends AbstractService implements
.getTrackingUrl(), request.getFinalApplicationStatus(), request
.getDiagnostics()));
- FinishApplicationMasterResponse response = recordFactory
- .newRecordInstance(FinishApplicationMasterResponse.class);
- return response;
+ if (rmContext.getRMApps().get(applicationAttemptId.getApplicationId())
+ .isAppSafeToUnregister()) {
+ return FinishApplicationMasterResponse.newInstance(true);
+ } else {
+ return FinishApplicationMasterResponse.newInstance(false);
+ }
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
index 11248bad48c..cc31643296f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java
@@ -414,8 +414,8 @@ public class ClientRMService extends AbstractService implements
}
if (applicationStates != null && !applicationStates.isEmpty()) {
- if (!applicationStates.contains(RMServerUtils
- .createApplicationState(application.getState()))) {
+ if (!applicationStates.contains(application
+ .createApplicationState())) {
continue;
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
index 6439df1c225..0c38f506b5a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAppManager.java
@@ -186,10 +186,6 @@ public class RMAppManager implements EventHandler,
completedApps.add(applicationId);
writeAuditLog(applicationId);
-
- // application completely done. Remove from state
- RMStateStore store = rmContext.getStateStore();
- store.removeApplication(rmContext.getRMApps().get(applicationId));
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
index 15d306293e9..370040ae740 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
@@ -115,27 +115,4 @@ public class RMServerUtils {
}
}
}
-
- public static YarnApplicationState createApplicationState(RMAppState rmAppState) {
- switch(rmAppState) {
- case NEW:
- return YarnApplicationState.NEW;
- case NEW_SAVING:
- return YarnApplicationState.NEW_SAVING;
- case SUBMITTED:
- return YarnApplicationState.SUBMITTED;
- case ACCEPTED:
- return YarnApplicationState.ACCEPTED;
- case RUNNING:
- return YarnApplicationState.RUNNING;
- case FINISHING:
- case FINISHED:
- return YarnApplicationState.FINISHED;
- case KILLED:
- return YarnApplicationState.KILLED;
- case FAILED:
- return YarnApplicationState.FAILED;
- }
- throw new YarnRuntimeException("Unknown state passed!");
- }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java
index bdf4da38f0b..0852ce81a80 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.delegation.DelegationKey;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
@@ -108,7 +109,9 @@ public class MemoryRMStateStore extends RMStateStore {
ApplicationState appState = state.getApplicationState().get(
attemptState.getAttemptId().getApplicationId());
- assert appState != null;
+ if (appState == null) {
+ throw new YarnRuntimeException("Application doesn't exist");
+ }
if (appState.attempts.containsKey(attemptState.getAttemptId())) {
Exception e = new IOException("Attempt: " +
@@ -125,7 +128,9 @@ public class MemoryRMStateStore extends RMStateStore {
throws Exception {
ApplicationId appId = appState.getAppId();
ApplicationState removed = state.appState.remove(appId);
- assert removed != null;
+ if (removed == null) {
+ throw new YarnRuntimeException("Removing non-exsisting application state");
+ }
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
index 179b721bcdf..382ed97d61b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.Ap
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppStoredEvent;
+import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppRemovedEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent;
@@ -482,12 +483,15 @@ public abstract class RMStateStore extends AbstractService {
ApplicationState appState =
((RMStateStoreRemoveAppEvent) event).getAppState();
ApplicationId appId = appState.getAppId();
-
+ Exception removedException = null;
LOG.info("Removing info for app: " + appId);
try {
removeApplicationState(appState);
} catch (Exception e) {
LOG.error("Error removing app: " + appId, e);
+ removedException = e;
+ } finally {
+ notifyDoneRemovingApplcation(appId, removedException);
}
}
break;
@@ -521,7 +525,18 @@ public abstract class RMStateStore extends AbstractService {
rmDispatcher.getEventHandler().handle(
new RMAppAttemptStoredEvent(attemptId, storedException));
}
-
+
+ @SuppressWarnings("unchecked")
+ /**
+ * This is to notify RMApp that this application has been removed from
+ * RMStateStore
+ */
+ private void notifyDoneRemovingApplcation(ApplicationId appId,
+ Exception removedException) {
+ rmDispatcher.getEventHandler().handle(
+ new RMAppRemovedEvent(appId, removedException));
+ }
+
/**
* EventHandler implementation which forward events to the FSRMStateStore
* This hides the EventHandle methods of the store from its public interface
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
index f1c496a6c62..fadaa3b00e4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
+import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@@ -194,4 +195,20 @@ public interface RMApp extends EventHandler {
* @return the application type.
*/
String getApplicationType();
+
+ /**
+ * Check whether this application is safe to unregister.
+ * An application is deemed to be safe to unregister if it is an unmanaged
+ * AM or its state has been removed from state store.
+ * @return the flag which indicates whether this application is safe to
+ * unregister.
+ */
+ boolean isAppSafeToUnregister();
+
+ /**
+ * Create the external user-facing state of ApplicationMaster from the
+ * current state of the {@link RMApp}.
+ * @return the external user-facing state of ApplicationMaster.
+ */
+ YarnApplicationState createApplicationState();
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java
index d15e12e0014..e7dba63b904 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java
@@ -27,11 +27,14 @@ public enum RMAppEventType {
// Source: RMAppAttempt
APP_REJECTED,
APP_ACCEPTED,
- APP_SAVED,
ATTEMPT_REGISTERED,
- ATTEMPT_FINISHING,
+ ATTEMPT_UNREGISTERED,
ATTEMPT_FINISHED, // Will send the final state
ATTEMPT_FAILED,
ATTEMPT_KILLED,
- NODE_UPDATE
+ NODE_UPDATE,
+
+ // Source: RMStateStore
+ APP_SAVED,
+ APP_REMOVED
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
index 10eca08f2ba..b5922873b67 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java
@@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeState;
+import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler;
@@ -56,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
+import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.Recoverable;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppNodeUpdateEvent.RMAppNodeUpdateType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
@@ -109,6 +111,8 @@ public class RMAppImpl implements RMApp, Recoverable {
private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
private static final AppFinishedTransition FINISHED_TRANSITION =
new AppFinishedTransition();
+ private boolean isAppRemovalRequestSent = false;
+ private RMAppState previousStateAtRemoving;
private static final StateMachineFactory= application.getStartTime()));
}
+ private void assertAppRemoved(RMApp application){
+ verify(store).removeApplication(application);
+ }
+
private static void assertKilled(RMApp application) {
assertTimesAtFinish(application);
assertAppState(RMAppState.KILLED, application);
@@ -366,15 +373,27 @@ public class TestRMAppTransitions {
return application;
}
+ protected RMApp testCreateAppRemoving(
+ ApplicationSubmissionContext submissionContext) throws IOException {
+ RMApp application = testCreateAppRunning(submissionContext);
+ RMAppEvent finishingEvent =
+ new RMAppEvent(application.getApplicationId(),
+ RMAppEventType.ATTEMPT_UNREGISTERED);
+ application.handle(finishingEvent);
+ assertAppState(RMAppState.REMOVING, application);
+ assertAppRemoved(application);
+ return application;
+ }
+
protected RMApp testCreateAppFinishing(
ApplicationSubmissionContext submissionContext) throws IOException {
// unmanaged AMs don't use the FINISHING state
assert submissionContext == null || !submissionContext.getUnmanagedAM();
- RMApp application = testCreateAppRunning(submissionContext);
- // RUNNING => FINISHING event RMAppEventType.ATTEMPT_FINISHING
+ RMApp application = testCreateAppRemoving(submissionContext);
+ // REMOVING => FINISHING event RMAppEventType.APP_REMOVED
RMAppEvent finishingEvent =
new RMAppEvent(application.getApplicationId(),
- RMAppEventType.ATTEMPT_FINISHING);
+ RMAppEventType.APP_REMOVED);
application.handle(finishingEvent);
assertAppState(RMAppState.FINISHING, application);
assertTimesAtFinish(application);
@@ -634,6 +653,30 @@ public class TestRMAppTransitions {
assertFailed(application, ".*Failing the application.*");
}
+ @Test
+ public void testAppRemovingFinished() throws IOException {
+ LOG.info("--- START: testAppRemovingFINISHED ---");
+ RMApp application = testCreateAppRemoving(null);
+ // APP_REMOVING => FINISHED event RMAppEventType.ATTEMPT_FINISHED
+ RMAppEvent finishedEvent = new RMAppFinishedAttemptEvent(
+ application.getApplicationId(), null);
+ application.handle(finishedEvent);
+ rmDispatcher.await();
+ assertAppState(RMAppState.FINISHED, application);
+ }
+
+ @Test
+ public void testAppRemovingKilled() throws IOException {
+ LOG.info("--- START: testAppRemovingKilledD ---");
+ RMApp application = testCreateAppRemoving(null);
+ // APP_REMOVING => KILLED event RMAppEventType.KILL
+ RMAppEvent event =
+ new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
+ application.handle(event);
+ rmDispatcher.await();
+ assertAppState(RMAppState.KILLED, application);
+ }
+
@Test
public void testAppFinishingKill() throws IOException {
LOG.info("--- START: testAppFinishedFinished ---");