YARN-139. Interrupted Exception within AsyncDispatcher leads to user confusion. Contributed by Vinod Kumar Vavilapalli

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1401726 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Darrell Lowe 2012-10-24 15:20:58 +00:00
parent 90ba993bc7
commit 614a743fea
4 changed files with 68 additions and 52 deletions

View File

@ -107,6 +107,8 @@ import org.apache.hadoop.yarn.service.CompositeService;
import org.apache.hadoop.yarn.service.Service;
import org.apache.hadoop.yarn.util.ConverterUtils;
import com.google.common.annotations.VisibleForTesting;
/**
* The Map-Reduce Application Master.
* The state machine is encapsulated in the implementation of Job interface.
@ -398,52 +400,65 @@ public class MRAppMaster extends CompositeService {
protected void sysexit() {
System.exit(0);
}
@VisibleForTesting
public void shutDownJob() {
// job has finished
// this is the only job, so shut down the Appmaster
// note in a workflow scenario, this may lead to creation of a new
// job (FIXME?)
// Send job-end notification
if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) {
try {
LOG.info("Job end notification started for jobID : "
+ job.getReport().getJobId());
JobEndNotifier notifier = new JobEndNotifier();
notifier.setConf(getConfig());
notifier.notify(job.getReport());
} catch (InterruptedException ie) {
LOG.warn("Job end notification interrupted for jobID : "
+ job.getReport().getJobId(), ie);
}
}
// TODO:currently just wait for some time so clients can know the
// final states. Will be removed once RM come on.
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
try {
//We are finishing cleanly so this is the last retry
isLastAMRetry = true;
// Stop all services
// This will also send the final report to the ResourceManager
LOG.info("Calling stop for all the services");
MRAppMaster.this.stop();
} catch (Throwable t) {
LOG.warn("Graceful stop failed ", t);
}
//Bring the process down by force.
//Not needed after HADOOP-7140
LOG.info("Exiting MR AppMaster..GoodBye!");
sysexit();
}
private class JobFinishEventHandler implements EventHandler<JobFinishEvent> {
@Override
public void handle(JobFinishEvent event) {
// job has finished
// this is the only job, so shut down the Appmaster
// note in a workflow scenario, this may lead to creation of a new
// job (FIXME?)
// Send job-end notification
if (getConfig().get(MRJobConfig.MR_JOB_END_NOTIFICATION_URL) != null) {
try {
LOG.info("Job end notification started for jobID : "
+ job.getReport().getJobId());
JobEndNotifier notifier = new JobEndNotifier();
notifier.setConf(getConfig());
notifier.notify(job.getReport());
} catch (InterruptedException ie) {
LOG.warn("Job end notification interrupted for jobID : "
+ job.getReport().getJobId(), ie);
// Create a new thread to shutdown the AM. We should not do it in-line
// to avoid blocking the dispatcher itself.
new Thread() {
@Override
public void run() {
shutDownJob();
}
}
// TODO:currently just wait for some time so clients can know the
// final states. Will be removed once RM come on.
try {
Thread.sleep(5000);
} catch (InterruptedException e) {
e.printStackTrace();
}
try {
//We are finishing cleanly so this is the last retry
isLastAMRetry = true;
// Stop all services
// This will also send the final report to the ResourceManager
LOG.info("Calling stop for all the services");
stop();
} catch (Throwable t) {
LOG.warn("Graceful stop failed ", t);
}
//Bring the process down by force.
//Not needed after HADOOP-7140
LOG.info("Exiting MR AppMaster..GoodBye!");
sysexit();
}.start();
}
}

View File

@ -21,17 +21,15 @@ package org.apache.hadoop.mapreduce.v2.app;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.mockito.Mockito.times;
import java.io.IOException;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@ -49,7 +47,6 @@ import org.apache.hadoop.yarn.YarnException;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.service.AbstractService;
@ -68,7 +65,6 @@ import org.junit.Test;
private Path stagingJobPath = new Path(stagingJobDir);
private final static RecordFactory recordFactory = RecordFactoryProvider.
getRecordFactory(null);
private static final Log LOG = LogFactory.getLog(TestStagingCleanup.class);
@Test
public void testDeletionofStaging() throws IOException {
@ -86,9 +82,7 @@ import org.junit.Test;
jobid.setAppId(appId);
MRAppMaster appMaster = new TestMRApp(attemptId);
appMaster.init(conf);
EventHandler<JobFinishEvent> handler =
appMaster.createJobFinishEventHandler();
handler.handle(new JobFinishEvent(jobid));
appMaster.shutDownJob();
verify(fs).delete(stagingJobPath, true);
}

View File

@ -182,6 +182,9 @@ Release 0.23.5 - UNRELEASED
YARN-180. Capacity scheduler - containers that get reserved create
container token to early (acmurthy and bobby)
YARN-139. Interrupted Exception within AsyncDispatcher leads to user
confusion. (Vinod Kumar Vavilapalli via jlowe)
Release 0.23.4 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -68,7 +68,9 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher {
try {
event = eventQueue.take();
} catch(InterruptedException ie) {
LOG.warn("AsyncDispatcher thread interrupted", ie);
if (!stopped) {
LOG.warn("AsyncDispatcher thread interrupted", ie);
}
return;
}
if (event != null) {
@ -180,7 +182,9 @@ public class AsyncDispatcher extends AbstractService implements Dispatcher {
try {
eventQueue.put(event);
} catch (InterruptedException e) {
LOG.warn("AsyncDispatcher thread interrupted", e);
if (!stopped) {
LOG.warn("AsyncDispatcher thread interrupted", e);
}
throw new YarnException(e);
}
};