YARN-2755. NM fails to clean up usercache_DEL_<timestamp> dirs after YARN-661. Contributed by Siqi Li

(cherry picked from commit 73e626ad91)
This commit is contained in:
Jason Lowe 2014-10-30 15:10:27 +00:00
parent d2ba115f06
commit 3e8544c5f2
3 changed files with 66 additions and 29 deletions

View File

@ -771,6 +771,9 @@ Release 2.6.0 - UNRELEASED
YARN-2769. Fixed the problem that timeline domain is not set in distributed shell
AM when using shell_command on Windows. (Varun Vasudev via zjshen)
YARN-2755. NM fails to clean up usercache_DEL_<timestamp> dirs after
YARN-661 (Siqi Li via jlowe)
Release 2.5.1 - 2014-09-05
INCOMPATIBLE CHANGES

View File

@ -1324,7 +1324,7 @@ public class ResourceLocalizationService extends CompositeService
RemoteIterator<FileStatus> userDirStatus = lfs.listStatus(userDirPath);
FileDeletionTask dependentDeletionTask =
del.createFileDeletionTask(null, userDirPath, new Path[] {});
if (userDirStatus != null) {
if (userDirStatus != null && userDirStatus.hasNext()) {
List<FileDeletionTask> deletionTasks = new ArrayList<FileDeletionTask>();
while (userDirStatus.hasNext()) {
FileStatus status = userDirStatus.next();

View File

@ -35,7 +35,9 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
@ -190,34 +192,9 @@ public class TestNodeManagerReboot {
ResourceLocalizationService.NM_PRIVATE_DIR) > 0);
// restart the NodeManager
nm.stop();
nm = new MyNodeManager();
nm.start();
restartNM(MAX_TRIES);
checkNumOfLocalDirs();
numTries = 0;
while ((numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.USERCACHE) > 0
|| numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.FILECACHE) > 0 || numOfLocalDirs(
nmLocalDir.getAbsolutePath(), ResourceLocalizationService.NM_PRIVATE_DIR) > 0)
&& numTries < MAX_TRIES) {
try {
Thread.sleep(500);
} catch (InterruptedException ex) {
// Do nothing
}
numTries++;
}
Assert
.assertTrue(
"After NM reboots, all local files should be deleted",
numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.USERCACHE) == 0
&& numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.FILECACHE) == 0
&& numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ResourceLocalizationService.NM_PRIVATE_DIR) == 0);
verify(delService, times(1)).delete(
(String) isNull(),
argThat(new PathInclude(ResourceLocalizationService.NM_PRIVATE_DIR
@ -230,6 +207,50 @@ public class TestNodeManagerReboot {
verify(delService, times(1)).scheduleFileDeletionTask(
argThat(new FileDeletionInclude(null, ContainerLocalizer.USERCACHE
+ "_DEL_", new String[] {})));
// restart the NodeManager again
// this time usercache directory should be empty
restartNM(MAX_TRIES);
checkNumOfLocalDirs();
}
private void restartNM(int maxTries) {
nm.stop();
nm = new MyNodeManager();
nm.start();
int numTries = 0;
while ((numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.USERCACHE) > 0
|| numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.FILECACHE) > 0 || numOfLocalDirs(
nmLocalDir.getAbsolutePath(), ResourceLocalizationService.NM_PRIVATE_DIR) > 0)
&& numTries < maxTries) {
try {
Thread.sleep(500);
} catch (InterruptedException ex) {
// Do nothing
}
numTries++;
}
}
private void checkNumOfLocalDirs() throws IOException {
Assert
.assertTrue(
"After NM reboots, all local files should be deleted",
numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.USERCACHE) == 0
&& numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ContainerLocalizer.FILECACHE) == 0
&& numOfLocalDirs(nmLocalDir.getAbsolutePath(),
ResourceLocalizationService.NM_PRIVATE_DIR) == 0);
Assert
.assertTrue(
"After NM reboots, usercache_DEL_* directory should be deleted",
numOfUsercacheDELDirs(nmLocalDir.getAbsolutePath()) == 0);
}
private int numOfLocalDirs(String localDir, String localSubDir) {
@ -241,6 +262,19 @@ public class TestNodeManagerReboot {
}
}
private int numOfUsercacheDELDirs(String localDir) throws IOException {
int count = 0;
RemoteIterator<FileStatus> fileStatus = localFS.listStatus(new Path(localDir));
while (fileStatus.hasNext()) {
FileStatus status = fileStatus.next();
if (status.getPath().getName().matches(".*" +
ContainerLocalizer.USERCACHE + "_DEL_.*")) {
count++;
}
}
return count;
}
private void createFiles(String dir, String subDir, int numOfFiles) {
for (int i = 0; i < numOfFiles; i++) {
File newFile = new File(dir + "/" + subDir, "file_" + (i + 1));