From 2d125f2beea23eefe5f8b2da9a6aa91c9e29d469 Mon Sep 17 00:00:00 2001 From: Zhihai Xu Date: Tue, 15 Dec 2015 00:58:23 -0800 Subject: [PATCH] MAPREDUCE-6436. JobHistory cache issue. Contributed by Kai Sasaki (cherry picked from commit 5b7078d06921893200163a3d29c8901c3c0107cb) --- .../mapreduce/v2/hs/HistoryFileManager.java | 37 +++++++++++++++++-- hadoop-yarn-project/CHANGES.txt | 2 + 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java index 84f28baf2ee..4adca4528a7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java @@ -219,13 +219,21 @@ public class HistoryFileManager extends AbstractService { // keeping the cache size exactly at the maximum. Iterator keys = cache.navigableKeySet().iterator(); long cutoff = System.currentTimeMillis() - maxAge; + + // MAPREDUCE-6436: In order to reduce the number of logs written + // in case of a lot of move pending histories. + JobId firstInIntermediateKey = null; + int inIntermediateCount = 0; + JobId firstMoveFailedKey = null; + int moveFailedCount = 0; + while(cache.size() > maxSize && keys.hasNext()) { JobId key = keys.next(); HistoryFileInfo firstValue = cache.get(key); if(firstValue != null) { synchronized(firstValue) { if (firstValue.isMovePending()) { - if(firstValue.didMoveFail() && + if(firstValue.didMoveFail() && firstValue.jobIndexInfo.getFinishTime() <= cutoff) { cache.remove(key); //Now lets try to delete it @@ -236,8 +244,17 @@ public class HistoryFileManager extends AbstractService { " that could not be moved to done.", e); } } else { - LOG.warn("Waiting to remove " + key - + " from JobListCache because it is not in done yet."); + if (firstValue.didMoveFail()) { + if (moveFailedCount == 0) { + firstMoveFailedKey = key; + } + moveFailedCount += 1; + } else { + if (inIntermediateCount == 0) { + firstInIntermediateKey = key; + } + inIntermediateCount += 1; + } } } else { cache.remove(key); @@ -245,6 +262,20 @@ public class HistoryFileManager extends AbstractService { } } } + // Log output only for first jobhisotry in pendings to restrict + // the total number of logs. + if (inIntermediateCount > 0) { + LOG.warn("Waiting to remove IN_INTERMEDIATE state histories " + + "(e.g. " + firstInIntermediateKey + ") from JobListCache " + + "because it is not in done yet. Total count is " + + inIntermediateCount + "."); + } + if (moveFailedCount > 0) { + LOG.warn("Waiting to remove MOVE_FAILED state histories " + + "(e.g. " + firstMoveFailedKey + ") from JobListCache " + + "because it is not in done yet. Total count is " + + moveFailedCount + "."); + } } return old; } diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0fe6bb175bb..9f566cc8d6f 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -27,6 +27,8 @@ Release 2.6.3 - UNRELEASED IMPROVEMENTS + MAPREDUCE-6436. JobHistory cache issue. (Kai Sasaki via zxu) + OPTIMIZATIONS BUG FIXES