YARN-1446. Changed client API to retry killing application till RM acknowledges so as to account for RM crashes/failover. Contributed by Jian He.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551444 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a6754bbb81
commit
b774d7b3de
|
@ -178,6 +178,9 @@ Release 2.4.0 - UNRELEASED
|
||||||
YARN-1435. Modified Distributed Shell to accept either the command or the
|
YARN-1435. Modified Distributed Shell to accept either the command or the
|
||||||
custom script. (Xuan Gong via zjshen)
|
custom script. (Xuan Gong via zjshen)
|
||||||
|
|
||||||
|
YARN-1446. Changed client API to retry killing application till RM
|
||||||
|
acknowledges so as to account for RM crashes/failover. (Jian He via vinodkv)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -26,10 +26,21 @@ import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
|
||||||
import org.apache.hadoop.yarn.util.Records;
|
import org.apache.hadoop.yarn.util.Records;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>The response sent by the <code>ResourceManager</code> to the client
|
* <p>
|
||||||
* aborting a submitted application.</p>
|
* The response sent by the <code>ResourceManager</code> to the client aborting
|
||||||
*
|
* a submitted application.
|
||||||
* <p>Currently it's empty.</p>
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The response, includes:
|
||||||
|
* <ul>
|
||||||
|
* <li>A flag which indicates that the process of killing the application is
|
||||||
|
* completed or not.</li>
|
||||||
|
* </ul>
|
||||||
|
* Note: user is recommended to wait until this flag becomes true, otherwise if
|
||||||
|
* the <code>ResourceManager</code> crashes before the process of killing the
|
||||||
|
* application is completed, the <code>ResourceManager</code> may retry this
|
||||||
|
* application on recovery.
|
||||||
|
* </p>
|
||||||
*
|
*
|
||||||
* @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)
|
* @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest)
|
||||||
*/
|
*/
|
||||||
|
@ -38,9 +49,24 @@ import org.apache.hadoop.yarn.util.Records;
|
||||||
public abstract class KillApplicationResponse {
|
public abstract class KillApplicationResponse {
|
||||||
@Private
|
@Private
|
||||||
@Unstable
|
@Unstable
|
||||||
public static KillApplicationResponse newInstance() {
|
public static KillApplicationResponse newInstance(boolean isKillCompleted) {
|
||||||
KillApplicationResponse response =
|
KillApplicationResponse response =
|
||||||
Records.newRecord(KillApplicationResponse.class);
|
Records.newRecord(KillApplicationResponse.class);
|
||||||
|
response.setIsKillCompleted(isKillCompleted);
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the flag which indicates that the process of killing application is completed or not.
|
||||||
|
*/
|
||||||
|
@Public
|
||||||
|
@Stable
|
||||||
|
public abstract boolean getIsKillCompleted();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the flag which indicates that the process of killing application is completed or not.
|
||||||
|
*/
|
||||||
|
@Private
|
||||||
|
@Unstable
|
||||||
|
public abstract void setIsKillCompleted(boolean isKillCompleted);
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
import org.apache.hadoop.classification.InterfaceAudience.Public;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Evolving;
|
import org.apache.hadoop.classification.InterfaceStability.Evolving;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
import org.apache.hadoop.http.HttpConfig;
|
import org.apache.hadoop.http.HttpConfig;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
|
@ -882,14 +881,22 @@ public class YarnConfiguration extends Configuration {
|
||||||
////////////////////////////////
|
////////////////////////////////
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Use YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS instead.
|
||||||
* The interval of the yarn client's querying application state after
|
* The interval of the yarn client's querying application state after
|
||||||
* application submission. The unit is millisecond.
|
* application submission. The unit is millisecond.
|
||||||
*/
|
*/
|
||||||
|
@Deprecated
|
||||||
public static final String YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS =
|
public static final String YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS =
|
||||||
YARN_PREFIX + "client.app-submission.poll-interval";
|
YARN_PREFIX + "client.app-submission.poll-interval";
|
||||||
public static final long DEFAULT_YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS =
|
|
||||||
1000;
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The interval that the yarn client library uses to poll the completion
|
||||||
|
* status of the asynchronous API of application client protocol.
|
||||||
|
*/
|
||||||
|
public static final String YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS =
|
||||||
|
YARN_PREFIX + "client.application-client-protocol.poll-interval-ms";
|
||||||
|
public static final long DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS =
|
||||||
|
200;
|
||||||
/**
|
/**
|
||||||
* Max number of threads in NMClientAsync to process container management
|
* Max number of threads in NMClientAsync to process container management
|
||||||
* events
|
* events
|
||||||
|
|
|
@ -116,6 +116,7 @@ message KillApplicationRequestProto {
|
||||||
}
|
}
|
||||||
|
|
||||||
message KillApplicationResponseProto {
|
message KillApplicationResponseProto {
|
||||||
|
optional bool is_kill_completed = 1 [default = false];
|
||||||
}
|
}
|
||||||
|
|
||||||
message GetClusterMetricsRequestProto {
|
message GetClusterMetricsRequestProto {
|
||||||
|
|
|
@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||||
|
@ -79,7 +80,8 @@ public class YarnClientImpl extends YarnClient {
|
||||||
|
|
||||||
protected ApplicationClientProtocol rmClient;
|
protected ApplicationClientProtocol rmClient;
|
||||||
protected InetSocketAddress rmAddress;
|
protected InetSocketAddress rmAddress;
|
||||||
protected long statePollIntervalMillis;
|
protected long submitPollIntervalMillis;
|
||||||
|
private long asyncApiPollIntervalMillis;
|
||||||
|
|
||||||
private static final String ROOT = "root";
|
private static final String ROOT = "root";
|
||||||
|
|
||||||
|
@ -92,12 +94,20 @@ public class YarnClientImpl extends YarnClient {
|
||||||
YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);
|
YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
@Override
|
@Override
|
||||||
protected void serviceInit(Configuration conf) throws Exception {
|
protected void serviceInit(Configuration conf) throws Exception {
|
||||||
this.rmAddress = getRmAddress(conf);
|
this.rmAddress = getRmAddress(conf);
|
||||||
statePollIntervalMillis = conf.getLong(
|
asyncApiPollIntervalMillis =
|
||||||
|
conf.getLong(YarnConfiguration.YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS,
|
||||||
|
YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS);
|
||||||
|
submitPollIntervalMillis = asyncApiPollIntervalMillis;
|
||||||
|
if (conf.get(YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS)
|
||||||
|
!= null) {
|
||||||
|
submitPollIntervalMillis = conf.getLong(
|
||||||
YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS,
|
YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS,
|
||||||
YarnConfiguration.DEFAULT_YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS);
|
YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS);
|
||||||
|
}
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -165,7 +175,7 @@ public class YarnClientImpl extends YarnClient {
|
||||||
" is still in " + state);
|
" is still in " + state);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
Thread.sleep(statePollIntervalMillis);
|
Thread.sleep(submitPollIntervalMillis);
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -179,11 +189,29 @@ public class YarnClientImpl extends YarnClient {
|
||||||
@Override
|
@Override
|
||||||
public void killApplication(ApplicationId applicationId)
|
public void killApplication(ApplicationId applicationId)
|
||||||
throws YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
LOG.info("Killing application " + applicationId);
|
|
||||||
KillApplicationRequest request =
|
KillApplicationRequest request =
|
||||||
Records.newRecord(KillApplicationRequest.class);
|
Records.newRecord(KillApplicationRequest.class);
|
||||||
request.setApplicationId(applicationId);
|
request.setApplicationId(applicationId);
|
||||||
rmClient.forceKillApplication(request);
|
|
||||||
|
try {
|
||||||
|
int pollCount = 0;
|
||||||
|
while (true) {
|
||||||
|
KillApplicationResponse response =
|
||||||
|
rmClient.forceKillApplication(request);
|
||||||
|
if (response.getIsKillCompleted()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (++pollCount % 10 == 0) {
|
||||||
|
LOG.info("Watiting for application " + applicationId
|
||||||
|
+ " to be killed.");
|
||||||
|
}
|
||||||
|
Thread.sleep(asyncApiPollIntervalMillis);
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.error("Interrupted while waiting for application " + applicationId
|
||||||
|
+ " to be killed.");
|
||||||
|
}
|
||||||
|
LOG.info("Killed application " + applicationId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -42,6 +42,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -89,6 +91,7 @@ public class TestYarnClient {
|
||||||
rm.stop();
|
rm.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
@Test (timeout = 30000)
|
@Test (timeout = 30000)
|
||||||
public void testSubmitApplication() {
|
public void testSubmitApplication() {
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
|
@ -128,6 +131,23 @@ public class TestYarnClient {
|
||||||
client.stop();
|
client.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKillApplication() throws Exception {
|
||||||
|
MockRM rm = new MockRM();
|
||||||
|
rm.start();
|
||||||
|
RMApp app = rm.submitApp(2000);
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
@SuppressWarnings("resource")
|
||||||
|
final YarnClient client = new MockYarnClient();
|
||||||
|
client.init(conf);
|
||||||
|
client.start();
|
||||||
|
|
||||||
|
client.killApplication(app.getApplicationId());
|
||||||
|
verify(((MockYarnClient) client).getRMClient(), times(2))
|
||||||
|
.forceKillApplication(any(KillApplicationRequest.class));
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout = 30000)
|
@Test(timeout = 30000)
|
||||||
public void testApplicationType() throws Exception {
|
public void testApplicationType() throws Exception {
|
||||||
Logger rootLogger = LogManager.getRootLogger();
|
Logger rootLogger = LogManager.getRootLogger();
|
||||||
|
@ -234,6 +254,11 @@ public class TestYarnClient {
|
||||||
GetApplicationReportRequest.class))).thenReturn(mockResponse);
|
GetApplicationReportRequest.class))).thenReturn(mockResponse);
|
||||||
when(rmClient.getApplications(any(GetApplicationsRequest.class)))
|
when(rmClient.getApplications(any(GetApplicationsRequest.class)))
|
||||||
.thenReturn(mockAppResponse);
|
.thenReturn(mockAppResponse);
|
||||||
|
// return false for 1st kill request, and true for the 2nd.
|
||||||
|
when(rmClient.forceKillApplication(any(
|
||||||
|
KillApplicationRequest.class)))
|
||||||
|
.thenReturn(KillApplicationResponse.newInstance(false)).thenReturn(
|
||||||
|
KillApplicationResponse.newInstance(true));
|
||||||
} catch (YarnException e) {
|
} catch (YarnException e) {
|
||||||
Assert.fail("Exception is not expected.");
|
Assert.fail("Exception is not expected.");
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -242,6 +267,10 @@ public class TestYarnClient {
|
||||||
when(mockResponse.getApplicationReport()).thenReturn(mockReport);
|
when(mockResponse.getApplicationReport()).thenReturn(mockReport);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ApplicationClientProtocol getRMClient() {
|
||||||
|
return rmClient;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ApplicationReport> getApplications(
|
public List<ApplicationReport> getApplications(
|
||||||
Set<String> applicationTypes, EnumSet<YarnApplicationState> applicationStates)
|
Set<String> applicationTypes, EnumSet<YarnApplicationState> applicationStates)
|
||||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto;
|
import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto;
|
||||||
|
import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProtoOrBuilder;
|
||||||
|
|
||||||
import com.google.protobuf.TextFormat;
|
import com.google.protobuf.TextFormat;
|
||||||
|
|
||||||
|
@ -67,4 +68,24 @@ public class KillApplicationResponsePBImpl extends KillApplicationResponse {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return TextFormat.shortDebugString(getProto());
|
return TextFormat.shortDebugString(getProto());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void maybeInitBuilder() {
|
||||||
|
if (viaProto || builder == null) {
|
||||||
|
builder = KillApplicationResponseProto.newBuilder(proto);
|
||||||
|
}
|
||||||
|
viaProto = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean getIsKillCompleted() {
|
||||||
|
KillApplicationResponseProtoOrBuilder p =
|
||||||
|
viaProto ? proto : builder;
|
||||||
|
return p.getIsKillCompleted();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setIsKillCompleted(boolean isKillCompleted) {
|
||||||
|
maybeInitBuilder();
|
||||||
|
builder.setIsKillCompleted(isKillCompleted);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -945,10 +945,10 @@
|
||||||
|
|
||||||
<!-- Other configuration -->
|
<!-- Other configuration -->
|
||||||
<property>
|
<property>
|
||||||
<description>The interval of the yarn client's querying application state
|
<description>The interval that the yarn client library uses to poll the
|
||||||
after application submission. The unit is millisecond.</description>
|
completion status of the asynchronous API of application client protocol.
|
||||||
<name>yarn.client.app-submission.poll-interval</name>
|
</description>
|
||||||
<value>1000</value>
|
<name>yarn.client.application-client-protocol.poll-interval-ms</name>
|
||||||
|
<value>200</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
|
@ -292,15 +292,15 @@ public class ApplicationMasterService extends AbstractService implements
|
||||||
|
|
||||||
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
|
this.amLivelinessMonitor.receivedPing(applicationAttemptId);
|
||||||
|
|
||||||
rmContext.getDispatcher().getEventHandler().handle(
|
if (rmContext.getRMApps().get(applicationAttemptId.getApplicationId())
|
||||||
|
.isAppSafeToTerminate()) {
|
||||||
|
return FinishApplicationMasterResponse.newInstance(true);
|
||||||
|
} else {
|
||||||
|
// keep sending the unregister event as RM may crash in the meanwhile.
|
||||||
|
rmContext.getDispatcher().getEventHandler().handle(
|
||||||
new RMAppAttemptUnregistrationEvent(applicationAttemptId, request
|
new RMAppAttemptUnregistrationEvent(applicationAttemptId, request
|
||||||
.getTrackingUrl(), request.getFinalApplicationStatus(), request
|
.getTrackingUrl(), request.getFinalApplicationStatus(), request
|
||||||
.getDiagnostics()));
|
.getDiagnostics()));
|
||||||
|
|
||||||
if (rmContext.getRMApps().get(applicationAttemptId.getApplicationId())
|
|
||||||
.isAppSafeToUnregister()) {
|
|
||||||
return FinishApplicationMasterResponse.newInstance(true);
|
|
||||||
} else {
|
|
||||||
return FinishApplicationMasterResponse.newInstance(false);
|
return FinishApplicationMasterResponse.newInstance(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -380,14 +380,15 @@ public class ClientRMService extends AbstractService implements
|
||||||
+ ApplicationAccessType.MODIFY_APP.name() + " on " + applicationId));
|
+ ApplicationAccessType.MODIFY_APP.name() + " on " + applicationId));
|
||||||
}
|
}
|
||||||
|
|
||||||
this.rmContext.getDispatcher().getEventHandler().handle(
|
if (application.isAppSafeToTerminate()) {
|
||||||
new RMAppEvent(applicationId, RMAppEventType.KILL));
|
RMAuditLogger.logSuccess(callerUGI.getShortUserName(),
|
||||||
|
AuditConstants.KILL_APP_REQUEST, "ClientRMService", applicationId);
|
||||||
RMAuditLogger.logSuccess(callerUGI.getShortUserName(),
|
return KillApplicationResponse.newInstance(true);
|
||||||
AuditConstants.KILL_APP_REQUEST, "ClientRMService" , applicationId);
|
} else {
|
||||||
KillApplicationResponse response = recordFactory
|
this.rmContext.getDispatcher().getEventHandler()
|
||||||
.newRecordInstance(KillApplicationResponse.class);
|
.handle(new RMAppEvent(applicationId, RMAppEventType.KILL));
|
||||||
return response;
|
return KillApplicationResponse.newInstance(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -197,13 +197,13 @@ public interface RMApp extends EventHandler<RMAppEvent> {
|
||||||
String getApplicationType();
|
String getApplicationType();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check whether this application is safe to unregister.
|
* Check whether this application is safe to terminate.
|
||||||
* An application is deemed to be safe to unregister if it is an unmanaged
|
* An application is deemed to be safe to terminate if it is an unmanaged
|
||||||
* AM or its state has been removed from state store.
|
* AM or its state has been saved in state store.
|
||||||
* @return the flag which indicates whether this application is safe to
|
* @return the flag which indicates whether this application is safe to
|
||||||
* unregister.
|
* terminate.
|
||||||
*/
|
*/
|
||||||
boolean isAppSafeToUnregister();
|
boolean isAppSafeToTerminate();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create the external user-facing state of ApplicationMaster from the
|
* Create the external user-facing state of ApplicationMaster from the
|
||||||
|
|
|
@ -37,5 +37,4 @@ public enum RMAppEventType {
|
||||||
// Source: RMStateStore
|
// Source: RMStateStore
|
||||||
APP_NEW_SAVED,
|
APP_NEW_SAVED,
|
||||||
APP_UPDATE_SAVED,
|
APP_UPDATE_SAVED,
|
||||||
APP_REMOVED
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,10 +110,14 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
|
private static final FinalTransition FINAL_TRANSITION = new FinalTransition();
|
||||||
private static final AppFinishedTransition FINISHED_TRANSITION =
|
private static final AppFinishedTransition FINISHED_TRANSITION =
|
||||||
new AppFinishedTransition();
|
new AppFinishedTransition();
|
||||||
|
|
||||||
|
// These states stored are only valid when app is at killing or final_saving.
|
||||||
|
private RMAppState stateBeforeKilling;
|
||||||
private RMAppState stateBeforeFinalSaving;
|
private RMAppState stateBeforeFinalSaving;
|
||||||
private RMAppEvent eventCausingFinalSaving;
|
private RMAppEvent eventCausingFinalSaving;
|
||||||
private RMAppState targetedFinalState;
|
private RMAppState targetedFinalState;
|
||||||
private RMAppState recoveredFinalState;
|
private RMAppState recoveredFinalState;
|
||||||
|
|
||||||
Object transitionTodo;
|
Object transitionTodo;
|
||||||
|
|
||||||
private static final StateMachineFactory<RMAppImpl,
|
private static final StateMachineFactory<RMAppImpl,
|
||||||
|
@ -166,10 +170,8 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
new AppRejectedTransition(), RMAppState.FAILED))
|
new AppRejectedTransition(), RMAppState.FAILED))
|
||||||
.addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
|
.addTransition(RMAppState.SUBMITTED, RMAppState.ACCEPTED,
|
||||||
RMAppEventType.APP_ACCEPTED)
|
RMAppEventType.APP_ACCEPTED)
|
||||||
.addTransition(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING,
|
.addTransition(RMAppState.SUBMITTED, RMAppState.KILLING,
|
||||||
RMAppEventType.KILL,
|
RMAppEventType.KILL,new KillAttemptTransition())
|
||||||
new FinalSavingTransition(
|
|
||||||
new KillAppAndAttemptTransition(), RMAppState.KILLED))
|
|
||||||
|
|
||||||
// Transitions from ACCEPTED state
|
// Transitions from ACCEPTED state
|
||||||
.addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED,
|
.addTransition(RMAppState.ACCEPTED, RMAppState.ACCEPTED,
|
||||||
|
@ -180,10 +182,8 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING),
|
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING),
|
||||||
RMAppEventType.ATTEMPT_FAILED,
|
RMAppEventType.ATTEMPT_FAILED,
|
||||||
new AttemptFailedTransition(RMAppState.SUBMITTED))
|
new AttemptFailedTransition(RMAppState.SUBMITTED))
|
||||||
.addTransition(RMAppState.ACCEPTED, RMAppState.FINAL_SAVING,
|
.addTransition(RMAppState.ACCEPTED, RMAppState.KILLING,
|
||||||
RMAppEventType.KILL,
|
RMAppEventType.KILL,new KillAttemptTransition())
|
||||||
new FinalSavingTransition(
|
|
||||||
new KillAppAndAttemptTransition(), RMAppState.KILLED))
|
|
||||||
|
|
||||||
// Transitions from RUNNING state
|
// Transitions from RUNNING state
|
||||||
.addTransition(RMAppState.RUNNING, RMAppState.RUNNING,
|
.addTransition(RMAppState.RUNNING, RMAppState.RUNNING,
|
||||||
|
@ -200,10 +200,8 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING),
|
EnumSet.of(RMAppState.SUBMITTED, RMAppState.FINAL_SAVING),
|
||||||
RMAppEventType.ATTEMPT_FAILED,
|
RMAppEventType.ATTEMPT_FAILED,
|
||||||
new AttemptFailedTransition(RMAppState.SUBMITTED))
|
new AttemptFailedTransition(RMAppState.SUBMITTED))
|
||||||
.addTransition(RMAppState.RUNNING, RMAppState.FINAL_SAVING,
|
.addTransition(RMAppState.RUNNING, RMAppState.KILLING,
|
||||||
RMAppEventType.KILL,
|
RMAppEventType.KILL, new KillAttemptTransition())
|
||||||
new FinalSavingTransition(
|
|
||||||
new KillAppAndAttemptTransition(), RMAppState.KILLED))
|
|
||||||
|
|
||||||
// Transitions from FINAL_SAVING state
|
// Transitions from FINAL_SAVING state
|
||||||
.addTransition(RMAppState.FINAL_SAVING,
|
.addTransition(RMAppState.FINAL_SAVING,
|
||||||
|
@ -221,11 +219,27 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
// Transitions from FINISHING state
|
// Transitions from FINISHING state
|
||||||
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
||||||
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
|
RMAppEventType.ATTEMPT_FINISHED, FINISHED_TRANSITION)
|
||||||
.addTransition(RMAppState.FINISHING, RMAppState.FINISHED,
|
|
||||||
RMAppEventType.KILL, new KillAppAndAttemptTransition())
|
|
||||||
// ignorable transitions
|
// ignorable transitions
|
||||||
.addTransition(RMAppState.FINISHING, RMAppState.FINISHING,
|
.addTransition(RMAppState.FINISHING, RMAppState.FINISHING,
|
||||||
EnumSet.of(RMAppEventType.NODE_UPDATE))
|
EnumSet.of(RMAppEventType.NODE_UPDATE,
|
||||||
|
// ignore Kill as we have already saved the final Finished state in
|
||||||
|
// state store.
|
||||||
|
RMAppEventType.KILL))
|
||||||
|
|
||||||
|
// Transitions from KILLING state
|
||||||
|
.addTransition(RMAppState.KILLING, RMAppState.FINAL_SAVING,
|
||||||
|
RMAppEventType.ATTEMPT_KILLED,
|
||||||
|
new FinalSavingTransition(
|
||||||
|
new AppKilledTransition(), RMAppState.KILLED))
|
||||||
|
.addTransition(RMAppState.KILLING, RMAppState.KILLING,
|
||||||
|
EnumSet.of(
|
||||||
|
RMAppEventType.NODE_UPDATE,
|
||||||
|
RMAppEventType.ATTEMPT_REGISTERED,
|
||||||
|
RMAppEventType.ATTEMPT_UNREGISTERED,
|
||||||
|
RMAppEventType.ATTEMPT_FINISHED,
|
||||||
|
RMAppEventType.ATTEMPT_FAILED,
|
||||||
|
RMAppEventType.APP_UPDATE_SAVED,
|
||||||
|
RMAppEventType.KILL))
|
||||||
|
|
||||||
// Transitions from FINISHED state
|
// Transitions from FINISHED state
|
||||||
// ignorable transitions
|
// ignorable transitions
|
||||||
|
@ -249,7 +263,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
EnumSet.of(RMAppEventType.APP_ACCEPTED,
|
EnumSet.of(RMAppEventType.APP_ACCEPTED,
|
||||||
RMAppEventType.APP_REJECTED, RMAppEventType.KILL,
|
RMAppEventType.APP_REJECTED, RMAppEventType.KILL,
|
||||||
RMAppEventType.ATTEMPT_FINISHED, RMAppEventType.ATTEMPT_FAILED,
|
RMAppEventType.ATTEMPT_FINISHED, RMAppEventType.ATTEMPT_FAILED,
|
||||||
RMAppEventType.ATTEMPT_KILLED, RMAppEventType.NODE_UPDATE))
|
RMAppEventType.NODE_UPDATE))
|
||||||
|
|
||||||
.installTopology();
|
.installTopology();
|
||||||
|
|
||||||
|
@ -419,6 +433,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
case ACCEPTED:
|
case ACCEPTED:
|
||||||
case RUNNING:
|
case RUNNING:
|
||||||
case FINAL_SAVING:
|
case FINAL_SAVING:
|
||||||
|
case KILLING:
|
||||||
return FinalApplicationStatus.UNDEFINED;
|
return FinalApplicationStatus.UNDEFINED;
|
||||||
// finished without a proper final state is the same as failed
|
// finished without a proper final state is the same as failed
|
||||||
case FINISHING:
|
case FINISHING:
|
||||||
|
@ -681,7 +696,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
// No existent attempts means the attempt associated with this app was not
|
// No existent attempts means the attempt associated with this app was not
|
||||||
// started or started but not yet saved。
|
// started or started but not yet saved.
|
||||||
if (app.attempts.isEmpty()) {
|
if (app.attempts.isEmpty()) {
|
||||||
app.createNewAttempt(true);
|
app.createNewAttempt(true);
|
||||||
return RMAppState.SUBMITTED;
|
return RMAppState.SUBMITTED;
|
||||||
|
@ -811,7 +826,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event;
|
RMAppFailedAttemptEvent failedEvent = (RMAppFailedAttemptEvent) event;
|
||||||
diags = getAppAttemptFailedDiagnostics(failedEvent);
|
diags = getAppAttemptFailedDiagnostics(failedEvent);
|
||||||
break;
|
break;
|
||||||
case KILL:
|
case ATTEMPT_KILLED:
|
||||||
diags = getAppKilledDiagnostics();
|
diags = getAppKilledDiagnostics();
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -901,7 +916,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
private static class AppKilledTransition extends FinalTransition {
|
private static class AppKilledTransition extends FinalTransition {
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||||
app.diagnostics.append("Application killed by user.");
|
app.diagnostics.append(getAppKilledDiagnostics());
|
||||||
super.transition(app, event);
|
super.transition(app, event);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -910,15 +925,16 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
return "Application killed by user.";
|
return "Application killed by user.";
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class KillAppAndAttemptTransition extends AppKilledTransition {
|
private static class KillAttemptTransition extends RMAppTransition {
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||||
app.handler.handle(new RMAppAttemptEvent(app.currentAttempt.getAppAttemptId(),
|
app.stateBeforeKilling = app.getState();
|
||||||
RMAppAttemptEventType.KILL));
|
app.handler.handle(new RMAppAttemptEvent(app.currentAttempt
|
||||||
super.transition(app, event);
|
.getAppAttemptId(), RMAppAttemptEventType.KILL));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class AppRejectedTransition extends
|
private static final class AppRejectedTransition extends
|
||||||
FinalTransition{
|
FinalTransition{
|
||||||
public void transition(RMAppImpl app, RMAppEvent event) {
|
public void transition(RMAppImpl app, RMAppEvent event) {
|
||||||
|
@ -986,7 +1002,7 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isAppSafeToUnregister() {
|
public boolean isAppSafeToTerminate() {
|
||||||
RMAppState state = getState();
|
RMAppState state = getState();
|
||||||
return state.equals(RMAppState.FINISHING)
|
return state.equals(RMAppState.FINISHING)
|
||||||
|| state.equals(RMAppState.FINISHED) || state.equals(RMAppState.FAILED)
|
|| state.equals(RMAppState.FINISHED) || state.equals(RMAppState.FAILED)
|
||||||
|
@ -1003,6 +1019,9 @@ public class RMAppImpl implements RMApp, Recoverable {
|
||||||
if (rmAppState.equals(RMAppState.FINAL_SAVING)) {
|
if (rmAppState.equals(RMAppState.FINAL_SAVING)) {
|
||||||
rmAppState = stateBeforeFinalSaving;
|
rmAppState = stateBeforeFinalSaving;
|
||||||
}
|
}
|
||||||
|
if (rmAppState.equals(RMAppState.KILLING)) {
|
||||||
|
rmAppState = stateBeforeKilling;
|
||||||
|
}
|
||||||
switch (rmAppState) {
|
switch (rmAppState) {
|
||||||
case NEW:
|
case NEW:
|
||||||
return YarnApplicationState.NEW;
|
return YarnApplicationState.NEW;
|
||||||
|
|
|
@ -28,5 +28,6 @@ public enum RMAppState {
|
||||||
FINISHING,
|
FINISHING,
|
||||||
FINISHED,
|
FINISHED,
|
||||||
FAILED,
|
FAILED,
|
||||||
|
KILLING,
|
||||||
KILLED
|
KILLED
|
||||||
}
|
}
|
||||||
|
|
|
@ -361,6 +361,8 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
|
||||||
RMAppAttemptEventType.UNREGISTERED,
|
RMAppAttemptEventType.UNREGISTERED,
|
||||||
RMAppAttemptEventType.STATUS_UPDATE,
|
RMAppAttemptEventType.STATUS_UPDATE,
|
||||||
RMAppAttemptEventType.CONTAINER_ALLOCATED,
|
RMAppAttemptEventType.CONTAINER_ALLOCATED,
|
||||||
|
// ignore Kill as we have already saved the final Finished state in
|
||||||
|
// state store.
|
||||||
RMAppAttemptEventType.KILL))
|
RMAppAttemptEventType.KILL))
|
||||||
|
|
||||||
// Transitions from FINISHED State
|
// Transitions from FINISHED State
|
||||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
|
@ -277,12 +278,10 @@ public class MockRM extends ResourceManager {
|
||||||
node.getState());
|
node.getState());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void killApp(ApplicationId appId) throws Exception {
|
public KillApplicationResponse killApp(ApplicationId appId) throws Exception {
|
||||||
ApplicationClientProtocol client = getClientRMService();
|
ApplicationClientProtocol client = getClientRMService();
|
||||||
KillApplicationRequest req = Records
|
KillApplicationRequest req = KillApplicationRequest.newInstance(appId);
|
||||||
.newRecord(KillApplicationRequest.class);
|
return client.forceKillApplication(req);
|
||||||
req.setApplicationId(appId);
|
|
||||||
client.forceKillApplication(req);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// from AMLauncher
|
// from AMLauncher
|
||||||
|
|
|
@ -54,6 +54,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.GetDelegationTokenResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse;
|
||||||
import org.apache.hadoop.yarn.api.records.AMCommand;
|
import org.apache.hadoop.yarn.api.records.AMCommand;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
|
@ -76,8 +77,9 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState;
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.RMState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
|
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStoreEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
|
||||||
|
@ -414,10 +416,8 @@ public class TestRMRestart {
|
||||||
MockRM rm2 = new MockRM(conf, memStore);
|
MockRM rm2 = new MockRM(conf, memStore);
|
||||||
rm2.start();
|
rm2.start();
|
||||||
// assert the previous AM state is loaded back on RM recovery.
|
// assert the previous AM state is loaded back on RM recovery.
|
||||||
RMApp recoveredApp =
|
|
||||||
rm2.getRMContext().getRMApps().get(app0.getApplicationId());
|
rm2.waitForState(am0.getApplicationAttemptId(), RMAppAttemptState.FAILED);
|
||||||
Assert.assertEquals(RMAppAttemptState.FAILED, recoveredApp
|
|
||||||
.getAppAttempts().get(am0.getApplicationAttemptId()).getAppAttemptState());
|
|
||||||
rm1.stop();
|
rm1.stop();
|
||||||
rm2.stop();
|
rm2.stop();
|
||||||
}
|
}
|
||||||
|
@ -964,8 +964,8 @@ public class TestRMRestart {
|
||||||
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1),
|
Assert.assertEquals(BuilderUtils.newContainerId(attemptId1, 1),
|
||||||
attemptState.getMasterContainer().getId());
|
attemptState.getMasterContainer().getId());
|
||||||
|
|
||||||
// Setting AMLivelinessMonitor interval to be 10 Secs.
|
// Setting AMLivelinessMonitor interval to be 3 Secs.
|
||||||
conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 10000);
|
conf.setInt(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, 3000);
|
||||||
// start new RM
|
// start new RM
|
||||||
MockRM rm2 = new MockRM(conf, memStore);
|
MockRM rm2 = new MockRM(conf, memStore);
|
||||||
rm2.start();
|
rm2.start();
|
||||||
|
@ -1494,6 +1494,69 @@ public class TestRMRestart {
|
||||||
Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
|
Assert.assertTrue(rm1.getServiceState() == STATE.STOPPED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is to test Killing application should be able to wait until app
|
||||||
|
// reaches killed state and also check that attempt state is saved before app
|
||||||
|
// state is saved.
|
||||||
|
@Test
|
||||||
|
public void testClientRetryOnKillingApplication() throws Exception {
|
||||||
|
MemoryRMStateStore memStore = new TestMemoryRMStateStore();
|
||||||
|
memStore.init(conf);
|
||||||
|
|
||||||
|
// start RM
|
||||||
|
MockRM rm1 = new MockRM(conf, memStore);
|
||||||
|
rm1.start();
|
||||||
|
MockNM nm1 =
|
||||||
|
new MockNM("127.0.0.1:1234", 15120, rm1.getResourceTrackerService());
|
||||||
|
nm1.registerNode();
|
||||||
|
|
||||||
|
RMApp app1 =
|
||||||
|
rm1.submitApp(200, "name", "user", null, false, "default", 1, null,
|
||||||
|
"myType");
|
||||||
|
MockAM am1 = launchAM(app1, rm1, nm1);
|
||||||
|
|
||||||
|
KillApplicationResponse response;
|
||||||
|
int count = 0;
|
||||||
|
while (true) {
|
||||||
|
response = rm1.killApp(app1.getApplicationId());
|
||||||
|
if (response.getIsKillCompleted()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Thread.sleep(100);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
// we expect at least 2 calls for killApp as the first killApp always return
|
||||||
|
// false.
|
||||||
|
Assert.assertTrue(count >= 1);
|
||||||
|
|
||||||
|
rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED);
|
||||||
|
rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED);
|
||||||
|
Assert.assertEquals(1, ((TestMemoryRMStateStore) memStore).updateAttempt);
|
||||||
|
Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp);
|
||||||
|
}
|
||||||
|
|
||||||
|
public class TestMemoryRMStateStore extends MemoryRMStateStore {
|
||||||
|
int count = 0;
|
||||||
|
public int updateApp = 0;
|
||||||
|
public int updateAttempt = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateApplicationStateInternal(String appId,
|
||||||
|
ApplicationStateDataPBImpl appStateData) throws Exception {
|
||||||
|
updateApp = ++count;
|
||||||
|
super.updateApplicationStateInternal(appId, appStateData);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void
|
||||||
|
updateApplicationAttemptStateInternal(String attemptIdStr,
|
||||||
|
ApplicationAttemptStateDataPBImpl attemptStateData)
|
||||||
|
throws Exception {
|
||||||
|
updateAttempt = ++count;
|
||||||
|
super.updateApplicationAttemptStateInternal(attemptIdStr,
|
||||||
|
attemptStateData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static class TestSecurityMockRM extends MockRM {
|
public static class TestSecurityMockRM extends MockRM {
|
||||||
|
|
||||||
public TestSecurityMockRM(Configuration conf, RMStateStore store) {
|
public TestSecurityMockRM(Configuration conf, RMStateStore store) {
|
||||||
|
|
|
@ -145,7 +145,7 @@ public abstract class MockAsm extends MockApps {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isAppSafeToUnregister() {
|
public boolean isAppSafeToTerminate() {
|
||||||
throw new UnsupportedOperationException("Not supported yet.");
|
throw new UnsupportedOperationException("Not supported yet.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -218,7 +218,7 @@ public class MockRMApp implements RMApp {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean isAppSafeToUnregister() {
|
public boolean isAppSafeToTerminate() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -301,12 +301,9 @@ public class TestRMAppTransitions {
|
||||||
|
|
||||||
private void assertAppAndAttemptKilled(RMApp application)
|
private void assertAppAndAttemptKilled(RMApp application)
|
||||||
throws InterruptedException {
|
throws InterruptedException {
|
||||||
|
sendAttemptUpdateSavedEvent(application);
|
||||||
sendAppUpdateSavedEvent(application);
|
sendAppUpdateSavedEvent(application);
|
||||||
assertKilled(application);
|
assertKilled(application);
|
||||||
// send attempt final state saved event.
|
|
||||||
application.getCurrentAppAttempt().handle(
|
|
||||||
new RMAppAttemptUpdateSavedEvent(application.getCurrentAppAttempt()
|
|
||||||
.getAppAttemptId(), null));
|
|
||||||
Assert.assertEquals(RMAppAttemptState.KILLED, application
|
Assert.assertEquals(RMAppAttemptState.KILLED, application
|
||||||
.getCurrentAppAttempt().getAppAttemptState());
|
.getCurrentAppAttempt().getAppAttemptState());
|
||||||
assertAppFinalStateSaved(application);
|
assertAppFinalStateSaved(application);
|
||||||
|
@ -329,6 +326,12 @@ public class TestRMAppTransitions {
|
||||||
rmDispatcher.await();
|
rmDispatcher.await();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void sendAttemptUpdateSavedEvent(RMApp application) {
|
||||||
|
application.getCurrentAppAttempt().handle(
|
||||||
|
new RMAppAttemptUpdateSavedEvent(application.getCurrentAppAttempt()
|
||||||
|
.getAppAttemptId(), null));
|
||||||
|
}
|
||||||
|
|
||||||
protected RMApp testCreateAppNewSaving(
|
protected RMApp testCreateAppNewSaving(
|
||||||
ApplicationSubmissionContext submissionContext) throws IOException {
|
ApplicationSubmissionContext submissionContext) throws IOException {
|
||||||
RMApp application = createNewTestApp(submissionContext);
|
RMApp application = createNewTestApp(submissionContext);
|
||||||
|
@ -624,11 +627,12 @@ public class TestRMAppTransitions {
|
||||||
rmDispatcher.await();
|
rmDispatcher.await();
|
||||||
|
|
||||||
// Ignore Attempt_Finished if we were supposed to go to Finished.
|
// Ignore Attempt_Finished if we were supposed to go to Finished.
|
||||||
assertAppState(RMAppState.FINAL_SAVING, application);
|
assertAppState(RMAppState.KILLING, application);
|
||||||
RMAppEvent finishEvent =
|
RMAppEvent finishEvent =
|
||||||
new RMAppFinishedAttemptEvent(application.getApplicationId(), null);
|
new RMAppFinishedAttemptEvent(application.getApplicationId(), null);
|
||||||
application.handle(finishEvent);
|
application.handle(finishEvent);
|
||||||
assertAppState(RMAppState.FINAL_SAVING, application);
|
assertAppState(RMAppState.KILLING, application);
|
||||||
|
sendAttemptUpdateSavedEvent(application);
|
||||||
sendAppUpdateSavedEvent(application);
|
sendAppUpdateSavedEvent(application);
|
||||||
assertKilled(application);
|
assertKilled(application);
|
||||||
}
|
}
|
||||||
|
@ -686,8 +690,8 @@ public class TestRMAppTransitions {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testAppFinishingKill() throws IOException {
|
public void testAppAtFinishingIgnoreKill() throws IOException {
|
||||||
LOG.info("--- START: testAppFinishedFinished ---");
|
LOG.info("--- START: testAppAtFinishingIgnoreKill ---");
|
||||||
|
|
||||||
RMApp application = testCreateAppFinishing(null);
|
RMApp application = testCreateAppFinishing(null);
|
||||||
// FINISHING => FINISHED event RMAppEventType.KILL
|
// FINISHING => FINISHED event RMAppEventType.KILL
|
||||||
|
@ -695,7 +699,7 @@ public class TestRMAppTransitions {
|
||||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
||||||
application.handle(event);
|
application.handle(event);
|
||||||
rmDispatcher.await();
|
rmDispatcher.await();
|
||||||
assertAppState(RMAppState.FINISHED, application);
|
assertAppState(RMAppState.FINISHING, application);
|
||||||
}
|
}
|
||||||
|
|
||||||
// While App is at FINAL_SAVING, Attempt_Finished event may come before
|
// While App is at FINAL_SAVING, Attempt_Finished event may come before
|
||||||
|
@ -780,6 +784,7 @@ public class TestRMAppTransitions {
|
||||||
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
||||||
application.handle(event);
|
application.handle(event);
|
||||||
rmDispatcher.await();
|
rmDispatcher.await();
|
||||||
|
sendAttemptUpdateSavedEvent(application);
|
||||||
sendAppUpdateSavedEvent(application);
|
sendAppUpdateSavedEvent(application);
|
||||||
assertTimesAtFinish(application);
|
assertTimesAtFinish(application);
|
||||||
assertAppState(RMAppState.KILLED, application);
|
assertAppState(RMAppState.KILLED, application);
|
||||||
|
@ -801,14 +806,6 @@ public class TestRMAppTransitions {
|
||||||
assertTimesAtFinish(application);
|
assertTimesAtFinish(application);
|
||||||
assertAppState(RMAppState.KILLED, application);
|
assertAppState(RMAppState.KILLED, application);
|
||||||
|
|
||||||
// KILLED => KILLED event RMAppEventType.ATTEMPT_KILLED
|
|
||||||
event =
|
|
||||||
new RMAppEvent(application.getApplicationId(),
|
|
||||||
RMAppEventType.ATTEMPT_KILLED);
|
|
||||||
application.handle(event);
|
|
||||||
rmDispatcher.await();
|
|
||||||
assertTimesAtFinish(application);
|
|
||||||
assertAppState(RMAppState.KILLED, application);
|
|
||||||
|
|
||||||
// KILLED => KILLED event RMAppEventType.KILL
|
// KILLED => KILLED event RMAppEventType.KILL
|
||||||
event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
|
||||||
|
|
Loading…
Reference in New Issue