From ce778462ee5426421edb679f091e31e4b6fc1ef5 Mon Sep 17 00:00:00 2001 From: Konstantin Shvachko Date: Mon, 13 Feb 2012 21:12:03 +0000 Subject: [PATCH] MAPREDUCE-3837. Job tracker is not able to recover jobs after crash. Contributed by Mayank Bansal. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1243695 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/mapred/JobTracker.java | 14 +++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index d9f76e09d6a..b24cd3ffab6 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -2682,6 +2682,9 @@ Release 0.22.1 - Unreleased BUG FIXES + MAPREDUCE-3837. Job tracker is not able to recover jobs after crash. + (Mayank Bansal via shv) + Release 0.22.0 - 2011-11-29 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java index 091bf35a0b3..609d8824e85 100644 --- a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java +++ b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java @@ -1192,13 +1192,17 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, try { Path jobInfoFile = getSystemFileForJob(jobId); FSDataInputStream in = fs.open(jobInfoFile); - JobInfo token = new JobInfo(); + final JobInfo token = new JobInfo(); token.readFields(in); in.close(); - UserGroupInformation ugi = - UserGroupInformation.createRemoteUser(token.getUser().toString()); - submitJob(token.getJobID(), restartCount, - ugi, token.getJobSubmitDir().toString(), true, null); + final UserGroupInformation ugi = + UserGroupInformation.createRemoteUser(token.getUser().toString()); + ugi.doAs(new PrivilegedExceptionAction() { + public JobStatus run() throws IOException ,InterruptedException{ + return submitJob(token.getJobID(), restartCount, + ugi, token.getJobSubmitDir().toString(), true, null); + }}); + recovered++; } catch (Exception e) { LOG.warn("Could not recover job " + jobId, e);