YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that was causing unamanged AMs to not finish correctly. Contributed by haosdent.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1554886 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2014-01-02 19:54:07 +00:00
parent fe458a1e3a
commit bb2e2fee60
3 changed files with 24 additions and 4 deletions

View File

@ -280,6 +280,9 @@ Release 2.4.0 - UNRELEASED
YARN-1522. Fixed a race condition in the test TestApplicationCleanup that was YARN-1522. Fixed a race condition in the test TestApplicationCleanup that was
causing it to randomly fail. (Liyin Liang via vinodkv) causing it to randomly fail. (Liyin Liang via vinodkv)
YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that
was causing unamanged AMs to not finish correctly. (haosdent via vinodkv)
Release 2.3.0 - UNRELEASED Release 2.3.0 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -29,12 +29,14 @@ import java.io.OutputStream;
import java.net.URL; import java.net.URL;
import junit.framework.Assert; import junit.framework.Assert;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest;
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.client.ClientRMProxy; import org.apache.hadoop.yarn.client.ClientRMProxy;
@ -193,8 +195,10 @@ public class TestUnmanagedAMLauncher {
client.registerApplicationMaster(RegisterApplicationMasterRequest client.registerApplicationMaster(RegisterApplicationMasterRequest
.newInstance(NetUtils.getHostname(), -1, "")); .newInstance(NetUtils.getHostname(), -1, ""));
Thread.sleep(1000); Thread.sleep(1000);
FinishApplicationMasterResponse resp =
client.finishApplicationMaster(FinishApplicationMasterRequest client.finishApplicationMaster(FinishApplicationMasterRequest
.newInstance(FinalApplicationStatus.SUCCEEDED, "success", null)); .newInstance(FinalApplicationStatus.SUCCEEDED, "success", null));
assertTrue(resp.getIsUnregistered());
System.exit(0); System.exit(0);
} else { } else {
System.exit(1); System.exit(1);

View File

@ -292,8 +292,21 @@ public class ApplicationMasterService extends AbstractService implements
this.amLivelinessMonitor.receivedPing(applicationAttemptId); this.amLivelinessMonitor.receivedPing(applicationAttemptId);
if (rmContext.getRMApps().get(applicationAttemptId.getApplicationId()) RMApp rmApp =
.isAppSafeToTerminate()) { rmContext.getRMApps().get(applicationAttemptId.getApplicationId());
if (rmApp.getApplicationSubmissionContext().getUnmanagedAM()) {
// No recovery supported yet for unmanaged AM. Send the unregister event
// and (falsely) acknowledge state-store write immediately.
rmContext.getDispatcher().getEventHandler().handle(
new RMAppAttemptUnregistrationEvent(applicationAttemptId, request
.getTrackingUrl(), request.getFinalApplicationStatus(), request
.getDiagnostics()));
return FinishApplicationMasterResponse.newInstance(true);
}
// Not an unmanaged-AM.
if (rmApp.isAppSafeToTerminate()) {
return FinishApplicationMasterResponse.newInstance(true); return FinishApplicationMasterResponse.newInstance(true);
} else { } else {
// keep sending the unregister event as RM may crash in the meanwhile. // keep sending the unregister event as RM may crash in the meanwhile.