From c4febc7d0216feec32c84555eaf14ea5163db3b2 Mon Sep 17 00:00:00 2001 From: Zhihai Xu Date: Tue, 15 Dec 2015 00:58:23 -0800 Subject: [PATCH] MAPREDUCE-6436. JobHistory cache issue. Contributed by Kai Sasaki (cherry picked from commit 5b7078d06921893200163a3d29c8901c3c0107cb) --- hadoop-mapreduce-project/CHANGES.txt | 2 + .../mapreduce/v2/hs/HistoryFileManager.java | 37 +++++++++++++++++-- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 710875799dd..2a4e4a1d143 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -662,6 +662,8 @@ Release 2.6.4 - UNRELEASED IMPROVEMENTS + MAPREDUCE-6436. JobHistory cache issue. (Kai Sasaki via zxu) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java index b539072f2eb..01fb9d50b32 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java @@ -219,13 +219,21 @@ public HistoryFileInfo addIfAbsent(HistoryFileInfo fileInfo) { // keeping the cache size exactly at the maximum. Iterator keys = cache.navigableKeySet().iterator(); long cutoff = System.currentTimeMillis() - maxAge; + + // MAPREDUCE-6436: In order to reduce the number of logs written + // in case of a lot of move pending histories. + JobId firstInIntermediateKey = null; + int inIntermediateCount = 0; + JobId firstMoveFailedKey = null; + int moveFailedCount = 0; + while(cache.size() > maxSize && keys.hasNext()) { JobId key = keys.next(); HistoryFileInfo firstValue = cache.get(key); if(firstValue != null) { synchronized(firstValue) { if (firstValue.isMovePending()) { - if(firstValue.didMoveFail() && + if(firstValue.didMoveFail() && firstValue.jobIndexInfo.getFinishTime() <= cutoff) { cache.remove(key); //Now lets try to delete it @@ -236,8 +244,17 @@ public HistoryFileInfo addIfAbsent(HistoryFileInfo fileInfo) { " that could not be moved to done.", e); } } else { - LOG.warn("Waiting to remove " + key - + " from JobListCache because it is not in done yet."); + if (firstValue.didMoveFail()) { + if (moveFailedCount == 0) { + firstMoveFailedKey = key; + } + moveFailedCount += 1; + } else { + if (inIntermediateCount == 0) { + firstInIntermediateKey = key; + } + inIntermediateCount += 1; + } } } else { cache.remove(key); @@ -245,6 +262,20 @@ public HistoryFileInfo addIfAbsent(HistoryFileInfo fileInfo) { } } } + // Log output only for first jobhisotry in pendings to restrict + // the total number of logs. + if (inIntermediateCount > 0) { + LOG.warn("Waiting to remove IN_INTERMEDIATE state histories " + + "(e.g. " + firstInIntermediateKey + ") from JobListCache " + + "because it is not in done yet. Total count is " + + inIntermediateCount + "."); + } + if (moveFailedCount > 0) { + LOG.warn("Waiting to remove MOVE_FAILED state histories " + + "(e.g. " + firstMoveFailedKey + ") from JobListCache " + + "because it is not in done yet. Total count is " + + moveFailedCount + "."); + } } return old; }