MAPREDUCE-6436. JobHistory cache issue. Contributed by Kai Sasaki

(cherry picked from commit 5b7078d069)

Conflicts:
	hadoop-yarn-project/CHANGES.txt
This commit is contained in:
Zhihai Xu 2015-12-15 00:58:23 -08:00
parent 1507d30bc1
commit 35a5b8929e
2 changed files with 36 additions and 3 deletions

View File

@ -219,13 +219,21 @@ public class HistoryFileManager extends AbstractService {
// keeping the cache size exactly at the maximum.
Iterator<JobId> keys = cache.navigableKeySet().iterator();
long cutoff = System.currentTimeMillis() - maxAge;
// MAPREDUCE-6436: In order to reduce the number of logs written
// in case of a lot of move pending histories.
JobId firstInIntermediateKey = null;
int inIntermediateCount = 0;
JobId firstMoveFailedKey = null;
int moveFailedCount = 0;
while(cache.size() > maxSize && keys.hasNext()) {
JobId key = keys.next();
HistoryFileInfo firstValue = cache.get(key);
if(firstValue != null) {
synchronized(firstValue) {
if (firstValue.isMovePending()) {
if(firstValue.didMoveFail() &&
if(firstValue.didMoveFail() &&
firstValue.jobIndexInfo.getFinishTime() <= cutoff) {
cache.remove(key);
//Now lets try to delete it
@ -236,8 +244,17 @@ public class HistoryFileManager extends AbstractService {
" that could not be moved to done.", e);
}
} else {
LOG.warn("Waiting to remove " + key
+ " from JobListCache because it is not in done yet.");
if (firstValue.didMoveFail()) {
if (moveFailedCount == 0) {
firstMoveFailedKey = key;
}
moveFailedCount += 1;
} else {
if (inIntermediateCount == 0) {
firstInIntermediateKey = key;
}
inIntermediateCount += 1;
}
}
} else {
cache.remove(key);
@ -245,6 +262,20 @@ public class HistoryFileManager extends AbstractService {
}
}
}
// Log output only for first jobhisotry in pendings to restrict
// the total number of logs.
if (inIntermediateCount > 0) {
LOG.warn("Waiting to remove IN_INTERMEDIATE state histories " +
"(e.g. " + firstInIntermediateKey + ") from JobListCache " +
"because it is not in done yet. Total count is " +
inIntermediateCount + ".");
}
if (moveFailedCount > 0) {
LOG.warn("Waiting to remove MOVE_FAILED state histories " +
"(e.g. " + firstMoveFailedKey + ") from JobListCache " +
"because it is not in done yet. Total count is " +
moveFailedCount + ".");
}
}
return old;
}

View File

@ -48,6 +48,8 @@ Release 2.7.3 - UNRELEASED
YARN-4439. Clarify NMContainerStatus#toString method. (Jian He via xgong)
MAPREDUCE-6436. JobHistory cache issue. (Kai Sasaki via zxu)
Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES