MAPREDUCE-4680. Job history cleaner should only check timestamps of files in old enough directories (Robert Kanter via Sandy Ryza)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1536558 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a2d81e3017
commit
84cec3c805
|
@ -214,6 +214,9 @@ Release 2.2.1 - UNRELEASED
|
|||
|
||||
OPTIMIZATIONS
|
||||
|
||||
MAPREDUCE-4680. Job history cleaner should only check timestamps of files in
|
||||
old enough directories (Robert Kanter via Sandy Ryza)
|
||||
|
||||
BUG FIXES
|
||||
|
||||
MAPREDUCE-5569. FloatSplitter is not generating correct splits (Nathan
|
||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.jobhistory;
|
|||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Calendar;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
@ -499,4 +500,72 @@ public class JobHistoryUtils {
|
|||
return fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(
|
||||
histDirPath,jobId, (applicationAttemptId.getAttemptId() - 1)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks for the dirs to clean. The folder structure is YYYY/MM/DD/Serial so
|
||||
* we can use that to more efficiently find the directories to clean by
|
||||
* comparing the cutoff timestamp with the timestamp from the folder
|
||||
* structure.
|
||||
*
|
||||
* @param fc done dir FileContext
|
||||
* @param root folder for completed jobs
|
||||
* @param cutoff The cutoff for the max history age
|
||||
* @return The list of directories for cleaning
|
||||
* @throws IOException
|
||||
*/
|
||||
public static List<FileStatus> getHistoryDirsForCleaning(FileContext fc,
|
||||
Path root, long cutoff) throws IOException {
|
||||
List<FileStatus> fsList = new ArrayList<FileStatus>();
|
||||
Calendar cCal = Calendar.getInstance();
|
||||
cCal.setTimeInMillis(cutoff);
|
||||
int cYear = cCal.get(Calendar.YEAR);
|
||||
int cMonth = cCal.get(Calendar.MONTH) + 1;
|
||||
int cDate = cCal.get(Calendar.DATE);
|
||||
|
||||
RemoteIterator<FileStatus> yearDirIt = fc.listStatus(root);
|
||||
while (yearDirIt.hasNext()) {
|
||||
FileStatus yearDir = yearDirIt.next();
|
||||
try {
|
||||
int year = Integer.parseInt(yearDir.getPath().getName());
|
||||
if (year <= cYear) {
|
||||
RemoteIterator<FileStatus> monthDirIt =
|
||||
fc.listStatus(yearDir.getPath());
|
||||
while (monthDirIt.hasNext()) {
|
||||
FileStatus monthDir = monthDirIt.next();
|
||||
try {
|
||||
int month = Integer.parseInt(monthDir.getPath().getName());
|
||||
// If we only checked the month here, then something like 07/2013
|
||||
// would incorrectly not pass when the cutoff is 06/2014
|
||||
if (year < cYear || month <= cMonth) {
|
||||
RemoteIterator<FileStatus> dateDirIt =
|
||||
fc.listStatus(monthDir.getPath());
|
||||
while (dateDirIt.hasNext()) {
|
||||
FileStatus dateDir = dateDirIt.next();
|
||||
try {
|
||||
int date = Integer.parseInt(dateDir.getPath().getName());
|
||||
// If we only checked the date here, then something like
|
||||
// 07/21/2013 would incorrectly not pass when the cutoff is
|
||||
// 08/20/2013 or 07/20/2012
|
||||
if (year < cYear || month < cMonth || date <= cDate) {
|
||||
fsList.addAll(remoteIterToList(
|
||||
fc.listStatus(dateDir.getPath())));
|
||||
}
|
||||
} catch (NumberFormatException nfe) {
|
||||
// the directory didn't fit the format we're looking for so
|
||||
// skip the dir
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (NumberFormatException nfe) {
|
||||
// the directory didn't fit the format we're looking for so skip
|
||||
// the dir
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (NumberFormatException nfe) {
|
||||
// the directory didn't fit the format we're looking for so skip the dir
|
||||
}
|
||||
}
|
||||
return fsList;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.mapreduce.v2.jobhistory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.Calendar;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import org.apache.hadoop.fs.FileContext;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.fs.permission.FsPermission;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestJobHistoryUtils {
|
||||
|
||||
final static String TEST_DIR = new File(System.getProperty("test.build.data"))
|
||||
.getAbsolutePath();
|
||||
|
||||
@Test
|
||||
@SuppressWarnings("unchecked")
|
||||
public void testGetHistoryDirsForCleaning() throws IOException {
|
||||
Path pRoot = new Path(TEST_DIR, "org.apache.hadoop.mapreduce.v2.jobhistory."
|
||||
+ "TestJobHistoryUtils.testGetHistoryDirsForCleaning");
|
||||
FileContext fc = FileContext.getFileContext();
|
||||
Calendar cCal = Calendar.getInstance();
|
||||
int year = 2013;
|
||||
int month = 7;
|
||||
int day = 21;
|
||||
cCal.set(year, month - 1, day, 1, 0);
|
||||
long cutoff = cCal.getTimeInMillis();
|
||||
|
||||
clearDir(fc, pRoot);
|
||||
Path pId00 = createPath(fc, pRoot, year, month, day, "000000");
|
||||
Path pId01 = createPath(fc, pRoot, year, month, day+1, "000001");
|
||||
Path pId02 = createPath(fc, pRoot, year, month, day-1, "000002");
|
||||
Path pId03 = createPath(fc, pRoot, year, month+1, day, "000003");
|
||||
Path pId04 = createPath(fc, pRoot, year, month+1, day+1, "000004");
|
||||
Path pId05 = createPath(fc, pRoot, year, month+1, day-1, "000005");
|
||||
Path pId06 = createPath(fc, pRoot, year, month-1, day, "000006");
|
||||
Path pId07 = createPath(fc, pRoot, year, month-1, day+1, "000007");
|
||||
Path pId08 = createPath(fc, pRoot, year, month-1, day-1, "000008");
|
||||
Path pId09 = createPath(fc, pRoot, year+1, month, day, "000009");
|
||||
Path pId10 = createPath(fc, pRoot, year+1, month, day+1, "000010");
|
||||
Path pId11 = createPath(fc, pRoot, year+1, month, day-1, "000011");
|
||||
Path pId12 = createPath(fc, pRoot, year+1, month+1, day, "000012");
|
||||
Path pId13 = createPath(fc, pRoot, year+1, month+1, day+1, "000013");
|
||||
Path pId14 = createPath(fc, pRoot, year+1, month+1, day-1, "000014");
|
||||
Path pId15 = createPath(fc, pRoot, year+1, month-1, day, "000015");
|
||||
Path pId16 = createPath(fc, pRoot, year+1, month-1, day+1, "000016");
|
||||
Path pId17 = createPath(fc, pRoot, year+1, month-1, day-1, "000017");
|
||||
Path pId18 = createPath(fc, pRoot, year-1, month, day, "000018");
|
||||
Path pId19 = createPath(fc, pRoot, year-1, month, day+1, "000019");
|
||||
Path pId20 = createPath(fc, pRoot, year-1, month, day-1, "000020");
|
||||
Path pId21 = createPath(fc, pRoot, year-1, month+1, day, "000021");
|
||||
Path pId22 = createPath(fc, pRoot, year-1, month+1, day+1, "000022");
|
||||
Path pId23 = createPath(fc, pRoot, year-1, month+1, day-1, "000023");
|
||||
Path pId24 = createPath(fc, pRoot, year-1, month-1, day, "000024");
|
||||
Path pId25 = createPath(fc, pRoot, year-1, month-1, day+1, "000025");
|
||||
Path pId26 = createPath(fc, pRoot, year-1, month-1, day-1, "000026");
|
||||
// non-expected names should be ignored without problems
|
||||
Path pId27 = createPath(fc, pRoot, "foo", "" + month, "" + day, "000027");
|
||||
Path pId28 = createPath(fc, pRoot, "" + year, "foo", "" + day, "000028");
|
||||
Path pId29 = createPath(fc, pRoot, "" + year, "" + month, "foo", "000029");
|
||||
|
||||
List<FileStatus> dirs = JobHistoryUtils
|
||||
.getHistoryDirsForCleaning(fc, pRoot, cutoff);
|
||||
Collections.sort(dirs);
|
||||
Assert.assertEquals(14, dirs.size());
|
||||
Assert.assertEquals(pId26.toUri().getPath(),
|
||||
dirs.get(0).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId24.toUri().getPath(),
|
||||
dirs.get(1).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId25.toUri().getPath(),
|
||||
dirs.get(2).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId20.toUri().getPath(),
|
||||
dirs.get(3).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId18.toUri().getPath(),
|
||||
dirs.get(4).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId19.toUri().getPath(),
|
||||
dirs.get(5).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId23.toUri().getPath(),
|
||||
dirs.get(6).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId21.toUri().getPath(),
|
||||
dirs.get(7).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId22.toUri().getPath(),
|
||||
dirs.get(8).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId08.toUri().getPath(),
|
||||
dirs.get(9).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId06.toUri().getPath(),
|
||||
dirs.get(10).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId07.toUri().getPath(),
|
||||
dirs.get(11).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId02.toUri().getPath(),
|
||||
dirs.get(12).getPath().toUri().getPath());
|
||||
Assert.assertEquals(pId00.toUri().getPath(),
|
||||
dirs.get(13).getPath().toUri().getPath());
|
||||
}
|
||||
|
||||
private void clearDir(FileContext fc, Path p) throws IOException {
|
||||
try {
|
||||
fc.delete(p, true);
|
||||
} catch (FileNotFoundException e) {
|
||||
// ignore
|
||||
}
|
||||
fc.mkdir(p, FsPermission.getDirDefault(), false);
|
||||
}
|
||||
|
||||
private Path createPath(FileContext fc, Path root, int year, int month,
|
||||
int day, String id) throws IOException {
|
||||
Path path = new Path(root, year + Path.SEPARATOR + month + Path.SEPARATOR +
|
||||
day + Path.SEPARATOR + id);
|
||||
fc.mkdir(path, FsPermission.getDirDefault(), true);
|
||||
return path;
|
||||
}
|
||||
|
||||
private Path createPath(FileContext fc, Path root, String year, String month,
|
||||
String day, String id) throws IOException {
|
||||
Path path = new Path(root, year + Path.SEPARATOR + month + Path.SEPARATOR +
|
||||
day + Path.SEPARATOR + id);
|
||||
fc.mkdir(path, FsPermission.getDirDefault(), true);
|
||||
return path;
|
||||
}
|
||||
}
|
|
@ -924,6 +924,11 @@ public class HistoryFileManager extends AbstractService {
|
|||
fileInfo.delete();
|
||||
}
|
||||
|
||||
List<FileStatus> getHistoryDirsForCleaning(long cutoff) throws IOException {
|
||||
return JobHistoryUtils.
|
||||
getHistoryDirsForCleaning(doneDirFc, doneDirPrefixPath, cutoff);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up older history files.
|
||||
*
|
||||
|
@ -932,12 +937,9 @@ public class HistoryFileManager extends AbstractService {
|
|||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
void clean() throws IOException {
|
||||
// TODO this should be replaced by something that knows about the directory
|
||||
// structure and will put less of a load on HDFS.
|
||||
long cutoff = System.currentTimeMillis() - maxHistoryAge;
|
||||
boolean halted = false;
|
||||
// TODO Delete YYYY/MM/DD directories.
|
||||
List<FileStatus> serialDirList = findTimestampedDirectories();
|
||||
List<FileStatus> serialDirList = getHistoryDirsForCleaning(cutoff);
|
||||
// Sort in ascending order. Relies on YYYY/MM/DD/Serial
|
||||
Collections.sort(serialDirList);
|
||||
for (FileStatus serialDir : serialDirList) {
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.apache.hadoop.mapreduce.v2.jobhistory.JHAdminConfig;
|
|||
import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
|
||||
import org.junit.After;
|
||||
import org.junit.Test;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
@ -175,7 +176,8 @@ public class TestJobHistory {
|
|||
doReturn(list2).when(historyManager).scanDirectoryForHistoryFiles(
|
||||
eq(donePathToday), any(FileContext.class));
|
||||
|
||||
doReturn(fileStatusList).when(historyManager).findTimestampedDirectories();
|
||||
doReturn(fileStatusList).when(historyManager)
|
||||
.getHistoryDirsForCleaning(Mockito.anyLong());
|
||||
doReturn(true).when(historyManager).deleteDir(any(FileStatus.class));
|
||||
|
||||
JobListCache jobListCache = mock(JobListCache.class);
|
||||
|
|
Loading…
Reference in New Issue