svn merge -c 1588559 FIXES: MAPREDUCE-4937. MR AM handles an oversized split metainfo file poorly. Contributed by Eric Payne

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1588561 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Darrell Lowe 2014-04-18 20:40:00 +00:00
parent ccec38b801
commit c8d5d1f82a
5 changed files with 75 additions and 17 deletions

View File

@ -41,6 +41,9 @@ Release 2.5.0 - UNRELEASED
MAPREDUCE-5775. Remove unnecessary job.setNumReduceTasks in SleepJob.createJob MAPREDUCE-5775. Remove unnecessary job.setNumReduceTasks in SleepJob.createJob
(jhanver chand sharma via devaraj) (jhanver chand sharma via devaraj)
MAPREDUCE-4937. MR AM handles an oversized split metainfo file poorly
(Eric Payne via jlowe)
Release 2.4.1 - UNRELEASED Release 2.4.1 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1040,6 +1040,7 @@ public class MRAppMaster extends CompositeService {
// It's more test friendly to put it here. // It's more test friendly to put it here.
DefaultMetricsSystem.initialize("MRAppMaster"); DefaultMetricsSystem.initialize("MRAppMaster");
boolean initFailed = false;
if (!errorHappenedShutDown) { if (!errorHappenedShutDown) {
// create a job event for job intialization // create a job event for job intialization
JobEvent initJobEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT); JobEvent initJobEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT);
@ -1048,6 +1049,10 @@ public class MRAppMaster extends CompositeService {
// job-init to be done completely here. // job-init to be done completely here.
jobEventDispatcher.handle(initJobEvent); jobEventDispatcher.handle(initJobEvent);
// If job is still not initialized, an error happened during
// initialization. Must complete starting all of the services so failure
// events can be processed.
initFailed = (((JobImpl)job).getInternalState() != JobStateInternal.INITED);
// JobImpl's InitTransition is done (call above is synchronous), so the // JobImpl's InitTransition is done (call above is synchronous), so the
// "uber-decision" (MR-1220) has been made. Query job and switch to // "uber-decision" (MR-1220) has been made. Query job and switch to
@ -1076,8 +1081,14 @@ public class MRAppMaster extends CompositeService {
// set job classloader if configured // set job classloader if configured
MRApps.setJobClassLoader(getConfig()); MRApps.setJobClassLoader(getConfig());
// All components have started, start the job.
startJobs(); if (initFailed) {
JobEvent initFailedEvent = new JobEvent(job.getID(), JobEventType.JOB_INIT_FAILED);
jobEventDispatcher.handle(initFailedEvent);
} else {
// All components have started, start the job.
startJobs();
}
} }
@Override @Override

View File

@ -28,6 +28,7 @@ public enum JobEventType {
//Producer:MRAppMaster //Producer:MRAppMaster
JOB_INIT, JOB_INIT,
JOB_INIT_FAILED,
JOB_START, JOB_START,
//Producer:Task //Producer:Task

View File

@ -250,9 +250,12 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION) JobEventType.JOB_COUNTER_UPDATE, COUNTER_UPDATE_TRANSITION)
.addTransition .addTransition
(JobStateInternal.NEW, (JobStateInternal.NEW,
EnumSet.of(JobStateInternal.INITED, JobStateInternal.FAILED), EnumSet.of(JobStateInternal.INITED, JobStateInternal.NEW),
JobEventType.JOB_INIT, JobEventType.JOB_INIT,
new InitTransition()) new InitTransition())
.addTransition(JobStateInternal.NEW, JobStateInternal.FAIL_ABORT,
JobEventType.JOB_INIT_FAILED,
new InitFailedTransition())
.addTransition(JobStateInternal.NEW, JobStateInternal.KILLED, .addTransition(JobStateInternal.NEW, JobStateInternal.KILLED,
JobEventType.JOB_KILL, JobEventType.JOB_KILL,
new KillNewJobTransition()) new KillNewJobTransition())
@ -265,7 +268,7 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
// Ignore-able events // Ignore-able events
.addTransition(JobStateInternal.NEW, JobStateInternal.NEW, .addTransition(JobStateInternal.NEW, JobStateInternal.NEW,
JobEventType.JOB_UPDATED_NODES) JobEventType.JOB_UPDATED_NODES)
// Transitions from INITED state // Transitions from INITED state
.addTransition(JobStateInternal.INITED, JobStateInternal.INITED, .addTransition(JobStateInternal.INITED, JobStateInternal.INITED,
JobEventType.JOB_DIAGNOSTIC_UPDATE, JobEventType.JOB_DIAGNOSTIC_UPDATE,
@ -1374,6 +1377,15 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
public JobStateInternal transition(JobImpl job, JobEvent event) { public JobStateInternal transition(JobImpl job, JobEvent event) {
job.metrics.submittedJob(job); job.metrics.submittedJob(job);
job.metrics.preparingJob(job); job.metrics.preparingJob(job);
if (job.newApiCommitter) {
job.jobContext = new JobContextImpl(job.conf,
job.oldJobId);
} else {
job.jobContext = new org.apache.hadoop.mapred.JobContextImpl(
job.conf, job.oldJobId);
}
try { try {
setup(job); setup(job);
job.fs = job.getFileSystem(job.conf); job.fs = job.getFileSystem(job.conf);
@ -1409,14 +1421,6 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
checkTaskLimits(); checkTaskLimits();
if (job.newApiCommitter) {
job.jobContext = new JobContextImpl(job.conf,
job.oldJobId);
} else {
job.jobContext = new org.apache.hadoop.mapred.JobContextImpl(
job.conf, job.oldJobId);
}
long inputLength = 0; long inputLength = 0;
for (int i = 0; i < job.numMapTasks; ++i) { for (int i = 0; i < job.numMapTasks; ++i) {
inputLength += taskSplitMetaInfo[i].getInputDataLength(); inputLength += taskSplitMetaInfo[i].getInputDataLength();
@ -1443,15 +1447,14 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
job.metrics.endPreparingJob(job); job.metrics.endPreparingJob(job);
return JobStateInternal.INITED; return JobStateInternal.INITED;
} catch (IOException e) { } catch (Exception e) {
LOG.warn("Job init failed", e); LOG.warn("Job init failed", e);
job.metrics.endPreparingJob(job); job.metrics.endPreparingJob(job);
job.addDiagnostic("Job init failed : " job.addDiagnostic("Job init failed : "
+ StringUtils.stringifyException(e)); + StringUtils.stringifyException(e));
job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId, // Leave job in the NEW state. The MR AM will detect that the state is
job.jobContext, // not INITED and send a JOB_INIT_FAILED event.
org.apache.hadoop.mapreduce.JobStatus.State.FAILED)); return JobStateInternal.NEW;
return JobStateInternal.FAILED;
} }
} }
@ -1552,6 +1555,16 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job,
} }
} // end of InitTransition } // end of InitTransition
private static class InitFailedTransition
implements SingleArcTransition<JobImpl, JobEvent> {
@Override
public void transition(JobImpl job, JobEvent event) {
job.eventHandler.handle(new CommitterJobAbortEvent(job.jobId,
job.jobContext,
org.apache.hadoop.mapreduce.JobStatus.State.FAILED));
}
}
private static class SetupCompletedTransition private static class SetupCompletedTransition
implements SingleArcTransition<JobImpl, JobEvent> { implements SingleArcTransition<JobImpl, JobEvent> {
@Override @Override

View File

@ -81,6 +81,7 @@ import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.event.DrainDispatcher;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.event.InlineDispatcher; import org.apache.hadoop.yarn.event.InlineDispatcher;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.state.StateMachine; import org.apache.hadoop.yarn.state.StateMachine;
import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.state.StateMachineFactory;
import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.ConverterUtils;
@ -728,6 +729,35 @@ public class TestJobImpl {
commitHandler.stop(); commitHandler.stop();
} }
static final String EXCEPTIONMSG = "Splits max exceeded";
@Test
public void testMetaInfoSizeOverMax() throws Exception {
Configuration conf = new Configuration();
JobID jobID = JobID.forName("job_1234567890000_0001");
JobId jobId = TypeConverter.toYarn(jobID);
MRAppMetrics mrAppMetrics = MRAppMetrics.create();
JobImpl job =
new JobImpl(jobId, ApplicationAttemptId.newInstance(
ApplicationId.newInstance(0, 0), 0), conf, mock(EventHandler.class),
null, new JobTokenSecretManager(), new Credentials(), null, null,
mrAppMetrics, null, true, null, 0, null, null, null, null);
InitTransition initTransition = new InitTransition() {
@Override
protected TaskSplitMetaInfo[] createSplits(JobImpl job, JobId jobId) {
throw new YarnRuntimeException(EXCEPTIONMSG);
}
};
JobEvent mockJobEvent = mock(JobEvent.class);
JobStateInternal jobSI = initTransition.transition(job, mockJobEvent);
Assert.assertTrue("When init fails, return value from InitTransition.transition should equal NEW.",
jobSI.equals(JobStateInternal.NEW));
Assert.assertTrue("Job diagnostics should contain YarnRuntimeException",
job.getDiagnostics().toString().contains("YarnRuntimeException"));
Assert.assertTrue("Job diagnostics should contain " + EXCEPTIONMSG,
job.getDiagnostics().toString().contains(EXCEPTIONMSG));
}
private static CommitterEventHandler createCommitterEventHandler( private static CommitterEventHandler createCommitterEventHandler(
Dispatcher dispatcher, OutputCommitter committer) { Dispatcher dispatcher, OutputCommitter committer) {
final SystemClock clock = new SystemClock(); final SystemClock clock = new SystemClock();