From 22a0f31d665795beeaecbd37acbe11e1e88a623a Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Wed, 12 Feb 2014 16:08:02 +0000 Subject: [PATCH] MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job. (Jason Lowe via kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1567669 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../jobhistory/JobHistoryParser.java | 6 ++- .../v2/hs/TestJobHistoryParsing.java | 41 ++++++++++++++++++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 3b1fd05da51..c848dc51499 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -19,6 +19,9 @@ Release 2.4.0 - UNRELEASED OPTIMIZATIONS BUG FIXES + + MAPREDUCE-5746. Job diagnostics can implicate wrong task for a failed job. + (Jason Lowe via kasha) Release 2.3.1 - UNRELEASED diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java index 9d6f579c44d..19e2a51a132 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryParser.java @@ -353,8 +353,10 @@ private void handleTaskFailedEvent(TaskFailedEvent event) { taskInfo.error = StringInterner.weakIntern(event.getError()); taskInfo.failedDueToAttemptId = event.getFailedAttemptID(); taskInfo.counters = event.getCounters(); - info.errorInfo = "Task " + taskInfo.taskId +" failed " + - taskInfo.attemptsMap.size() + " times "; + if (info.errorInfo.isEmpty()) { + info.errorInfo = "Task " + taskInfo.taskId + " failed " + + taskInfo.attemptsMap.size() + " times "; + } } private void handleTaskStartedEvent(TaskStartedEvent event) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java index 4551b61e198..7893dbf5f04 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java @@ -41,6 +41,8 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.Counters; +import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TypeConverter; @@ -52,7 +54,9 @@ import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.JobInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskAttemptInfo; import org.apache.hadoop.mapreduce.jobhistory.JobHistoryParser.TaskInfo; +import org.apache.hadoop.mapreduce.jobhistory.TaskFailedEvent; import org.apache.hadoop.mapreduce.jobhistory.TaskFinishedEvent; +import org.apache.hadoop.mapreduce.jobhistory.TaskStartedEvent; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; @@ -70,7 +74,6 @@ import org.apache.hadoop.mapreduce.v2.hs.HistoryFileManager.HistoryFileInfo; import org.apache.hadoop.mapreduce.v2.hs.TestJobHistoryEvents.MRAppWithHistory; import org.apache.hadoop.mapreduce.v2.hs.webapp.dao.JobsInfo; -import org.apache.hadoop.mapreduce.v2.jobhistory.FileNameIndexUtils; import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils; import org.apache.hadoop.mapreduce.v2.jobhistory.JobIndexInfo; import org.apache.hadoop.net.DNSToSwitchMapping; @@ -726,4 +729,40 @@ public void testPartialJob() throws Exception { assertNull(test.getAMInfos()); } + + @Test + public void testMultipleFailedTasks() throws Exception { + JobHistoryParser parser = + new JobHistoryParser(Mockito.mock(FSDataInputStream.class)); + EventReader reader = Mockito.mock(EventReader.class); + final AtomicInteger numEventsRead = new AtomicInteger(0); // Hack! + final org.apache.hadoop.mapreduce.TaskType taskType = + org.apache.hadoop.mapreduce.TaskType.MAP; + final TaskID[] tids = new TaskID[2]; + JobID jid = new JobID("1", 1); + tids[0] = new TaskID(jid, taskType, 0); + tids[1] = new TaskID(jid, taskType, 1); + Mockito.when(reader.getNextEvent()).thenAnswer( + new Answer() { + public HistoryEvent answer(InvocationOnMock invocation) + throws IOException { + // send two task start and two task fail events for tasks 0 and 1 + int eventId = numEventsRead.getAndIncrement(); + TaskID tid = tids[eventId & 0x1]; + if (eventId < 2) { + return new TaskStartedEvent(tid, 0, taskType, ""); + } + if (eventId < 4) { + TaskFailedEvent tfe = new TaskFailedEvent(tid, 0, taskType, + "failed", "FAILED", null, new Counters()); + tfe.setDatum(tfe.getDatum()); + return tfe; + } + return null; + } + }); + JobInfo info = parser.parse(reader); + assertTrue("Task 0 not implicated", + info.getErrorInfo().contains(tids[0].toString())); + } }