MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job. (Jason Lowe via kasha)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1567669 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ee0d2f6eab
commit
22a0f31d66
|
@ -19,6 +19,9 @@ Release 2.4.0 - UNRELEASED
|
|||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job.
|
||||
(Jason Lowe via kasha)
|
||||
|
||||
Release 2.3.1 - UNRELEASED
|
||||
|
||||
|
|
|
@ -353,8 +353,10 @@ public class JobHistoryParser implements HistoryEventHandler {
|
|||
taskInfo.error = StringInterner.weakIntern(event.getError());
|
||||
taskInfo.failedDueToAttemptId = event.getFailedAttemptID();
|
||||
taskInfo.counters = event.getCounters();
|
||||
info.errorInfo = "Task " + taskInfo.taskId +" failed " +
|
||||
taskInfo.attemptsMap.size() + " times ";
|
||||
if (info.errorInfo.isEmpty()) {
|
||||
info.errorInfo = "Task " + taskInfo.taskId + " failed " +
|
||||
taskInfo.attemptsMap.size() + " times ";
|
||||
}
|
||||
}
|
||||
|
||||
private void handleTaskStartedEvent(TaskStartedEvent event) {
|
||||
|
|
|
@ -41,6 +41,8 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
|||
import org.apache.hadoop.fs.FSDataInputStream;
|
||||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.mapreduce.Counters;
|
||||
import org.apache.hadoop.mapreduce.JobID;
|
||||
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||
import org.apache.hadoop.mapreduce.TaskID;
|
||||
import org.apache.hadoop.mapreduce.TypeConverter;
|
||||
|
@ -52,7 +54,9 @@ import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.AMInfo;
|
|||
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent;
|
||||
import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobId;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.JobState;
|
||||
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
|
||||
|
@ -70,7 +74,6 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
|
|||
import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory;
|
||||
import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
|
||||
import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo;
|
||||
import org.apache.hadoop.net.DNSToSwitchMapping;
|
||||
|
@ -726,4 +729,40 @@ public class TestJobHistoryParsing {
|
|||
assertNull(test.getAMInfos());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMultipleFailedTasks() throws Exception {
|
||||
JobHistoryParser parser =
|
||||
new JobHistoryParser(Mockito.mock(FSDataInputStream.class));
|
||||
EventReader reader = Mockito.mock(EventReader.class);
|
||||
final AtomicInteger numEventsRead = new AtomicInteger(0); // Hack!
|
||||
final org.apache.hadoop.mapreduce.TaskType taskType =
|
||||
org.apache.hadoop.mapreduce.TaskType.MAP;
|
||||
final TaskID[] tids = new TaskID[2];
|
||||
JobID jid = new JobID("1", 1);
|
||||
tids[0] = new TaskID(jid, taskType, 0);
|
||||
tids[1] = new TaskID(jid, taskType, 1);
|
||||
Mockito.when(reader.getNextEvent()).thenAnswer(
|
||||
new Answer<HistoryEvent>() {
|
||||
public HistoryEvent answer(InvocationOnMock invocation)
|
||||
throws IOException {
|
||||
// send two task start and two task fail events for tasks 0 and 1
|
||||
int eventId = numEventsRead.getAndIncrement();
|
||||
TaskID tid = tids[eventId & 0x1];
|
||||
if (eventId < 2) {
|
||||
return new TaskStartedEvent(tid, 0, taskType, "");
|
||||
}
|
||||
if (eventId < 4) {
|
||||
TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType,
|
||||
"failed", "FAILED", null, new Counters());
|
||||
tfe.setDatum(tfe.getDatum());
|
||||
return tfe;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
});
|
||||
JobInfo info = parser.parse(reader);
|
||||
assertTrue("Task 0 not implicated",
|
||||
info.getErrorInfo().contains(tids[0].toString()));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue