From 10c7f0eb1526b8d66b79138d387243746d85e0f5 Mon Sep 17 00:00:00 2001 From: Konstantin Shvachko Date: Mon, 13 Feb 2012 21:16:22 +0000 Subject: [PATCH] MAPREDUCE-3837. Job tracker is not able to recover jobs after crash. Contributed by Mayank Bansal. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1243698 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/mapred/JobTracker.java | 14 +++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index c045b0f1100..d084f61278e 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -2649,6 +2649,9 @@ Release 0.22.1 - Unreleased BUG FIXES + MAPREDUCE-3837. Job tracker is not able to recover jobs after crash. + (Mayank Bansal via shv) + Release 0.22.0 - 2011-11-29 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java index 239038f707e..8a07128cc78 100644 --- a/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java +++ b/hadoop-mapreduce-project/src/java/org/apache/hadoop/mapred/JobTracker.java @@ -1192,13 +1192,17 @@ public class JobTracker implements MRConstants, InterTrackerProtocol, try { Path jobInfoFile = getSystemFileForJob(jobId); FSDataInputStream in = fs.open(jobInfoFile); - JobInfo token = new JobInfo(); + final JobInfo token = new JobInfo(); token.readFields(in); in.close(); - UserGroupInformation ugi = - UserGroupInformation.createRemoteUser(token.getUser().toString()); - submitJob(token.getJobID(), restartCount, - ugi, token.getJobSubmitDir().toString(), true, null); + final UserGroupInformation ugi = + UserGroupInformation.createRemoteUser(token.getUser().toString()); + ugi.doAs(new PrivilegedExceptionAction() { + public JobStatus run() throws IOException ,InterruptedException{ + return submitJob(token.getJobID(), restartCount, + ugi, token.getJobSubmitDir().toString(), true, null); + }}); + recovered++; } catch (Exception e) { LOG.warn("Could not recover job " + jobId, e);