YARN-90. NodeManager should identify failed disks becoming good again. Contributed by Varun Vasudev
This commit is contained in:
parent
b6f9d5538c
commit
6f2028bd15
|
@ -379,6 +379,9 @@ Release 2.6.0 - UNRELEASED
|
||||||
YARN-2582. Fixed Log CLI and Web UI for showing aggregated logs of LRS. (Xuan
|
YARN-2582. Fixed Log CLI and Web UI for showing aggregated logs of LRS. (Xuan
|
||||||
Gong via zjshen)
|
Gong via zjshen)
|
||||||
|
|
||||||
|
YARN-90. NodeManager should identify failed disks becoming good again
|
||||||
|
(Varun Vasudev via jlowe)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -21,18 +21,23 @@ package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.CopyOnWriteArrayList;
|
import java.util.concurrent.CopyOnWriteArrayList;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.lang.RandomStringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.util.DiskChecker;
|
import org.apache.hadoop.util.DiskChecker;
|
||||||
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Manages a list of local storage directories.
|
* Manages a list of local storage directories.
|
||||||
|
@ -40,9 +45,38 @@ import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||||
class DirectoryCollection {
|
class DirectoryCollection {
|
||||||
private static final Log LOG = LogFactory.getLog(DirectoryCollection.class);
|
private static final Log LOG = LogFactory.getLog(DirectoryCollection.class);
|
||||||
|
|
||||||
|
public enum DiskErrorCause {
|
||||||
|
DISK_FULL, OTHER
|
||||||
|
}
|
||||||
|
|
||||||
|
static class DiskErrorInformation {
|
||||||
|
DiskErrorCause cause;
|
||||||
|
String message;
|
||||||
|
|
||||||
|
DiskErrorInformation(DiskErrorCause cause, String message) {
|
||||||
|
this.cause = cause;
|
||||||
|
this.message = message;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a merged list which contains all the elements of l1 and l2
|
||||||
|
* @param l1 the first list to be included
|
||||||
|
* @param l2 the second list to be included
|
||||||
|
* @return a new list containing all the elements of the first and second list
|
||||||
|
*/
|
||||||
|
static List<String> concat(List<String> l1, List<String> l2) {
|
||||||
|
List<String> ret = new ArrayList<String>(l1.size() + l2.size());
|
||||||
|
ret.addAll(l1);
|
||||||
|
ret.addAll(l2);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
// Good local storage directories
|
// Good local storage directories
|
||||||
private List<String> localDirs;
|
private List<String> localDirs;
|
||||||
private List<String> failedDirs;
|
private List<String> errorDirs;
|
||||||
|
private List<String> fullDirs;
|
||||||
|
|
||||||
private int numFailures;
|
private int numFailures;
|
||||||
|
|
||||||
private float diskUtilizationPercentageCutoff;
|
private float diskUtilizationPercentageCutoff;
|
||||||
|
@ -109,7 +143,9 @@ class DirectoryCollection {
|
||||||
float utilizationPercentageCutOff,
|
float utilizationPercentageCutOff,
|
||||||
long utilizationSpaceCutOff) {
|
long utilizationSpaceCutOff) {
|
||||||
localDirs = new CopyOnWriteArrayList<String>(dirs);
|
localDirs = new CopyOnWriteArrayList<String>(dirs);
|
||||||
failedDirs = new CopyOnWriteArrayList<String>();
|
errorDirs = new CopyOnWriteArrayList<String>();
|
||||||
|
fullDirs = new CopyOnWriteArrayList<String>();
|
||||||
|
|
||||||
diskUtilizationPercentageCutoff = utilizationPercentageCutOff;
|
diskUtilizationPercentageCutoff = utilizationPercentageCutOff;
|
||||||
diskUtilizationSpaceCutoff = utilizationSpaceCutOff;
|
diskUtilizationSpaceCutoff = utilizationSpaceCutOff;
|
||||||
diskUtilizationPercentageCutoff =
|
diskUtilizationPercentageCutoff =
|
||||||
|
@ -131,7 +167,16 @@ class DirectoryCollection {
|
||||||
* @return the failed directories
|
* @return the failed directories
|
||||||
*/
|
*/
|
||||||
synchronized List<String> getFailedDirs() {
|
synchronized List<String> getFailedDirs() {
|
||||||
return Collections.unmodifiableList(failedDirs);
|
return Collections.unmodifiableList(
|
||||||
|
DirectoryCollection.concat(errorDirs, fullDirs));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the directories that have used all disk space
|
||||||
|
*/
|
||||||
|
|
||||||
|
synchronized List<String> getFullDirs() {
|
||||||
|
return fullDirs;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -158,7 +203,7 @@ class DirectoryCollection {
|
||||||
LOG.warn("Unable to create directory " + dir + " error " +
|
LOG.warn("Unable to create directory " + dir + " error " +
|
||||||
e.getMessage() + ", removing from the list of valid directories.");
|
e.getMessage() + ", removing from the list of valid directories.");
|
||||||
localDirs.remove(dir);
|
localDirs.remove(dir);
|
||||||
failedDirs.add(dir);
|
errorDirs.add(dir);
|
||||||
numFailures++;
|
numFailures++;
|
||||||
failed = true;
|
failed = true;
|
||||||
}
|
}
|
||||||
|
@ -167,61 +212,147 @@ class DirectoryCollection {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check the health of current set of local directories, updating the list
|
* Check the health of current set of local directories(good and failed),
|
||||||
* of valid directories if necessary.
|
* updating the list of valid directories if necessary.
|
||||||
* @return <em>true</em> if there is a new disk-failure identified in
|
*
|
||||||
* this checking. <em>false</em> otherwise.
|
* @return <em>true</em> if there is a new disk-failure identified in this
|
||||||
|
* checking or a failed directory passes the disk check <em>false</em>
|
||||||
|
* otherwise.
|
||||||
*/
|
*/
|
||||||
synchronized boolean checkDirs() {
|
synchronized boolean checkDirs() {
|
||||||
int oldNumFailures = numFailures;
|
boolean setChanged = false;
|
||||||
HashSet<String> checkFailedDirs = new HashSet<String>();
|
Set<String> preCheckGoodDirs = new HashSet<String>(localDirs);
|
||||||
for (final String dir : localDirs) {
|
Set<String> preCheckFullDirs = new HashSet<String>(fullDirs);
|
||||||
|
Set<String> preCheckOtherErrorDirs = new HashSet<String>(errorDirs);
|
||||||
|
List<String> failedDirs = DirectoryCollection.concat(errorDirs, fullDirs);
|
||||||
|
List<String> allLocalDirs =
|
||||||
|
DirectoryCollection.concat(localDirs, failedDirs);
|
||||||
|
|
||||||
|
Map<String, DiskErrorInformation> dirsFailedCheck = testDirs(allLocalDirs);
|
||||||
|
|
||||||
|
localDirs.clear();
|
||||||
|
errorDirs.clear();
|
||||||
|
fullDirs.clear();
|
||||||
|
|
||||||
|
for (Map.Entry<String, DiskErrorInformation> entry : dirsFailedCheck
|
||||||
|
.entrySet()) {
|
||||||
|
String dir = entry.getKey();
|
||||||
|
DiskErrorInformation errorInformation = entry.getValue();
|
||||||
|
switch (entry.getValue().cause) {
|
||||||
|
case DISK_FULL:
|
||||||
|
fullDirs.add(entry.getKey());
|
||||||
|
break;
|
||||||
|
case OTHER:
|
||||||
|
errorDirs.add(entry.getKey());
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (preCheckGoodDirs.contains(dir)) {
|
||||||
|
LOG.warn("Directory " + dir + " error, " + errorInformation.message
|
||||||
|
+ ", removing from list of valid directories");
|
||||||
|
setChanged = true;
|
||||||
|
numFailures++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (String dir : allLocalDirs) {
|
||||||
|
if (!dirsFailedCheck.containsKey(dir)) {
|
||||||
|
localDirs.add(dir);
|
||||||
|
if (preCheckFullDirs.contains(dir)
|
||||||
|
|| preCheckOtherErrorDirs.contains(dir)) {
|
||||||
|
setChanged = true;
|
||||||
|
LOG.info("Directory " + dir
|
||||||
|
+ " passed disk check, adding to list of valid directories.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Set<String> postCheckFullDirs = new HashSet<String>(fullDirs);
|
||||||
|
Set<String> postCheckOtherDirs = new HashSet<String>(errorDirs);
|
||||||
|
for (String dir : preCheckFullDirs) {
|
||||||
|
if (postCheckOtherDirs.contains(dir)) {
|
||||||
|
LOG.warn("Directory " + dir + " error "
|
||||||
|
+ dirsFailedCheck.get(dir).message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String dir : preCheckOtherErrorDirs) {
|
||||||
|
if (postCheckFullDirs.contains(dir)) {
|
||||||
|
LOG.warn("Directory " + dir + " error "
|
||||||
|
+ dirsFailedCheck.get(dir).message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return setChanged;
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, DiskErrorInformation> testDirs(List<String> dirs) {
|
||||||
|
HashMap<String, DiskErrorInformation> ret =
|
||||||
|
new HashMap<String, DiskErrorInformation>();
|
||||||
|
for (final String dir : dirs) {
|
||||||
|
String msg;
|
||||||
try {
|
try {
|
||||||
File testDir = new File(dir);
|
File testDir = new File(dir);
|
||||||
DiskChecker.checkDir(testDir);
|
DiskChecker.checkDir(testDir);
|
||||||
if (isDiskUsageUnderPercentageLimit(testDir)) {
|
if (isDiskUsageOverPercentageLimit(testDir)) {
|
||||||
LOG.warn("Directory " + dir
|
msg =
|
||||||
+ " error, used space above threshold of "
|
"used space above threshold of "
|
||||||
+ diskUtilizationPercentageCutoff
|
+ diskUtilizationPercentageCutoff
|
||||||
+ "%, removing from the list of valid directories.");
|
+ "%";
|
||||||
checkFailedDirs.add(dir);
|
ret.put(dir,
|
||||||
} else if (isDiskFreeSpaceWithinLimit(testDir)) {
|
new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
|
||||||
LOG.warn("Directory " + dir + " error, free space below limit of "
|
continue;
|
||||||
+ diskUtilizationSpaceCutoff
|
} else if (isDiskFreeSpaceUnderLimit(testDir)) {
|
||||||
+ "MB, removing from the list of valid directories.");
|
msg =
|
||||||
checkFailedDirs.add(dir);
|
"free space below limit of " + diskUtilizationSpaceCutoff
|
||||||
}
|
+ "MB";
|
||||||
} catch (DiskErrorException de) {
|
ret.put(dir,
|
||||||
LOG.warn("Directory " + dir + " error " + de.getMessage()
|
new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg));
|
||||||
+ ", removing from the list of valid directories.");
|
continue;
|
||||||
checkFailedDirs.add(dir);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (String dir : checkFailedDirs) {
|
|
||||||
localDirs.remove(dir);
|
|
||||||
failedDirs.add(dir);
|
|
||||||
numFailures++;
|
|
||||||
}
|
|
||||||
return numFailures > oldNumFailures;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isDiskUsageUnderPercentageLimit(File dir) {
|
// create a random dir to make sure fs isn't in read-only mode
|
||||||
|
verifyDirUsingMkdir(testDir);
|
||||||
|
} catch (IOException ie) {
|
||||||
|
ret.put(dir,
|
||||||
|
new DiskErrorInformation(DiskErrorCause.OTHER, ie.getMessage()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function to test whether a dir is working correctly by actually creating a
|
||||||
|
* random directory.
|
||||||
|
*
|
||||||
|
* @param dir
|
||||||
|
* the dir to test
|
||||||
|
*/
|
||||||
|
private void verifyDirUsingMkdir(File dir) throws IOException {
|
||||||
|
|
||||||
|
String randomDirName = RandomStringUtils.randomAlphanumeric(5);
|
||||||
|
File target = new File(dir, randomDirName);
|
||||||
|
int i = 0;
|
||||||
|
while (target.exists()) {
|
||||||
|
|
||||||
|
randomDirName = RandomStringUtils.randomAlphanumeric(5) + i;
|
||||||
|
target = new File(dir, randomDirName);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
DiskChecker.checkDir(target);
|
||||||
|
} finally {
|
||||||
|
FileUtils.deleteQuietly(target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isDiskUsageOverPercentageLimit(File dir) {
|
||||||
float freePercentage =
|
float freePercentage =
|
||||||
100 * (dir.getUsableSpace() / (float) dir.getTotalSpace());
|
100 * (dir.getUsableSpace() / (float) dir.getTotalSpace());
|
||||||
float usedPercentage = 100.0F - freePercentage;
|
float usedPercentage = 100.0F - freePercentage;
|
||||||
if (usedPercentage > diskUtilizationPercentageCutoff
|
return (usedPercentage > diskUtilizationPercentageCutoff
|
||||||
|| usedPercentage >= 100.0F) {
|
|| usedPercentage >= 100.0F);
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isDiskFreeSpaceWithinLimit(File dir) {
|
private boolean isDiskFreeSpaceUnderLimit(File dir) {
|
||||||
long freeSpace = dir.getUsableSpace() / (1024 * 1024);
|
long freeSpace = dir.getUsableSpace() / (1024 * 1024);
|
||||||
if (freeSpace < this.diskUtilizationSpaceCutoff) {
|
return freeSpace < this.diskUtilizationSpaceCutoff;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void createDir(FileContext localFs, Path dir, FsPermission perm)
|
private void createDir(FileContext localFs, Path dir, FsPermission perm)
|
||||||
|
|
|
@ -21,7 +21,9 @@ package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.Timer;
|
import java.util.Timer;
|
||||||
import java.util.TimerTask;
|
import java.util.TimerTask;
|
||||||
|
|
||||||
|
@ -150,7 +152,7 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
boolean createSucceeded = localDirs.createNonExistentDirs(localFs, perm);
|
boolean createSucceeded = localDirs.createNonExistentDirs(localFs, perm);
|
||||||
createSucceeded &= logDirs.createNonExistentDirs(localFs, perm);
|
createSucceeded &= logDirs.createNonExistentDirs(localFs, perm);
|
||||||
if (!createSucceeded) {
|
if (!createSucceeded) {
|
||||||
updateDirsAfterFailure();
|
updateDirsAfterTest();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the disk health immediately to weed out bad directories
|
// Check the disk health immediately to weed out bad directories
|
||||||
|
@ -197,9 +199,52 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* @return the local directories which have no disk space
|
||||||
|
*/
|
||||||
|
public List<String> getDiskFullLocalDirs() {
|
||||||
|
return localDirs.getFullDirs();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the log directories that have no disk space
|
||||||
|
*/
|
||||||
|
public List<String> getDiskFullLogDirs() {
|
||||||
|
return logDirs.getFullDirs();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function to get the local dirs which should be considered when cleaning up
|
||||||
|
* resources. Contains the good local dirs and the local dirs that have reached
|
||||||
|
* the disk space limit
|
||||||
|
*
|
||||||
|
* @return the local dirs which should be considered for cleaning up
|
||||||
|
*/
|
||||||
|
public List<String> getLocalDirsForCleanup() {
|
||||||
|
return DirectoryCollection.concat(localDirs.getGoodDirs(),
|
||||||
|
localDirs.getFullDirs());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function to get the log dirs which should be considered when cleaning up
|
||||||
|
* resources. Contains the good log dirs and the log dirs that have reached
|
||||||
|
* the disk space limit
|
||||||
|
*
|
||||||
|
* @return the log dirs which should be considered for cleaning up
|
||||||
|
*/
|
||||||
|
public List<String> getLogDirsForCleanup() {
|
||||||
|
return DirectoryCollection.concat(logDirs.getGoodDirs(),
|
||||||
|
logDirs.getFullDirs());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function to generate a report on the state of the disks.
|
||||||
|
*
|
||||||
|
* @param listGoodDirs
|
||||||
|
* flag to determine whether the report should report the state of
|
||||||
|
* good dirs or failed dirs
|
||||||
* @return the health report of nm-local-dirs and nm-log-dirs
|
* @return the health report of nm-local-dirs and nm-log-dirs
|
||||||
*/
|
*/
|
||||||
public String getDisksHealthReport() {
|
public String getDisksHealthReport(boolean listGoodDirs) {
|
||||||
if (!isDiskHealthCheckerEnabled) {
|
if (!isDiskHealthCheckerEnabled) {
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
@ -207,20 +252,31 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
StringBuilder report = new StringBuilder();
|
StringBuilder report = new StringBuilder();
|
||||||
List<String> failedLocalDirsList = localDirs.getFailedDirs();
|
List<String> failedLocalDirsList = localDirs.getFailedDirs();
|
||||||
List<String> failedLogDirsList = logDirs.getFailedDirs();
|
List<String> failedLogDirsList = logDirs.getFailedDirs();
|
||||||
int numLocalDirs = localDirs.getGoodDirs().size()
|
List<String> goodLocalDirsList = localDirs.getGoodDirs();
|
||||||
+ failedLocalDirsList.size();
|
List<String> goodLogDirsList = logDirs.getGoodDirs();
|
||||||
int numLogDirs = logDirs.getGoodDirs().size() + failedLogDirsList.size();
|
int numLocalDirs = goodLocalDirsList.size() + failedLocalDirsList.size();
|
||||||
|
int numLogDirs = goodLogDirsList.size() + failedLogDirsList.size();
|
||||||
|
if (!listGoodDirs) {
|
||||||
if (!failedLocalDirsList.isEmpty()) {
|
if (!failedLocalDirsList.isEmpty()) {
|
||||||
report.append(failedLocalDirsList.size() + "/" + numLocalDirs
|
report.append(failedLocalDirsList.size() + "/" + numLocalDirs
|
||||||
+ " local-dirs turned bad: "
|
+ " local-dirs are bad: "
|
||||||
+ StringUtils.join(",", failedLocalDirsList) + ";");
|
+ StringUtils.join(",", failedLocalDirsList) + "; ");
|
||||||
}
|
}
|
||||||
if (!failedLogDirsList.isEmpty()) {
|
if (!failedLogDirsList.isEmpty()) {
|
||||||
report.append(failedLogDirsList.size() + "/" + numLogDirs
|
report.append(failedLogDirsList.size() + "/" + numLogDirs
|
||||||
+ " log-dirs turned bad: "
|
+ " log-dirs are bad: " + StringUtils.join(",", failedLogDirsList));
|
||||||
+ StringUtils.join(",", failedLogDirsList));
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
report.append(goodLocalDirsList.size() + "/" + numLocalDirs
|
||||||
|
+ " local-dirs are good: " + StringUtils.join(",", goodLocalDirsList)
|
||||||
|
+ "; ");
|
||||||
|
report.append(goodLogDirsList.size() + "/" + numLogDirs
|
||||||
|
+ " log-dirs are good: " + StringUtils.join(",", goodLogDirsList));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return report.toString();
|
return report.toString();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -262,8 +318,8 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
* Set good local dirs and good log dirs in the configuration so that the
|
* Set good local dirs and good log dirs in the configuration so that the
|
||||||
* LocalDirAllocator objects will use this updated configuration only.
|
* LocalDirAllocator objects will use this updated configuration only.
|
||||||
*/
|
*/
|
||||||
private void updateDirsAfterFailure() {
|
private void updateDirsAfterTest() {
|
||||||
LOG.info("Disk(s) failed. " + getDisksHealthReport());
|
|
||||||
Configuration conf = getConfig();
|
Configuration conf = getConfig();
|
||||||
List<String> localDirs = getLocalDirs();
|
List<String> localDirs = getLocalDirs();
|
||||||
conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS,
|
conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS,
|
||||||
|
@ -273,25 +329,93 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
logDirs.toArray(new String[logDirs.size()]));
|
logDirs.toArray(new String[logDirs.size()]));
|
||||||
if (!areDisksHealthy()) {
|
if (!areDisksHealthy()) {
|
||||||
// Just log.
|
// Just log.
|
||||||
LOG.error("Most of the disks failed. " + getDisksHealthReport());
|
LOG.error("Most of the disks failed. " + getDisksHealthReport(false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void logDiskStatus(boolean newDiskFailure, boolean diskTurnedGood) {
|
||||||
|
if (newDiskFailure) {
|
||||||
|
String report = getDisksHealthReport(false);
|
||||||
|
LOG.info("Disk(s) failed: " + report);
|
||||||
|
}
|
||||||
|
if (diskTurnedGood) {
|
||||||
|
String report = getDisksHealthReport(true);
|
||||||
|
LOG.info("Disk(s) turned good: " + report);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private void checkDirs() {
|
private void checkDirs() {
|
||||||
boolean newFailure = false;
|
boolean disksStatusChange = false;
|
||||||
|
Set<String> failedLocalDirsPreCheck =
|
||||||
|
new HashSet<String>(localDirs.getFailedDirs());
|
||||||
|
Set<String> failedLogDirsPreCheck =
|
||||||
|
new HashSet<String>(logDirs.getFailedDirs());
|
||||||
|
|
||||||
if (localDirs.checkDirs()) {
|
if (localDirs.checkDirs()) {
|
||||||
newFailure = true;
|
disksStatusChange = true;
|
||||||
}
|
}
|
||||||
if (logDirs.checkDirs()) {
|
if (logDirs.checkDirs()) {
|
||||||
newFailure = true;
|
disksStatusChange = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (newFailure) {
|
Set<String> failedLocalDirsPostCheck =
|
||||||
updateDirsAfterFailure();
|
new HashSet<String>(localDirs.getFailedDirs());
|
||||||
|
Set<String> failedLogDirsPostCheck =
|
||||||
|
new HashSet<String>(logDirs.getFailedDirs());
|
||||||
|
|
||||||
|
boolean disksFailed = false;
|
||||||
|
boolean disksTurnedGood = false;
|
||||||
|
|
||||||
|
disksFailed =
|
||||||
|
disksTurnedBad(failedLocalDirsPreCheck, failedLocalDirsPostCheck);
|
||||||
|
disksTurnedGood =
|
||||||
|
disksTurnedGood(failedLocalDirsPreCheck, failedLocalDirsPostCheck);
|
||||||
|
|
||||||
|
// skip check if we have new failed or good local dirs since we're going to
|
||||||
|
// log anyway
|
||||||
|
if (!disksFailed) {
|
||||||
|
disksFailed =
|
||||||
|
disksTurnedBad(failedLogDirsPreCheck, failedLogDirsPostCheck);
|
||||||
}
|
}
|
||||||
|
if (!disksTurnedGood) {
|
||||||
|
disksTurnedGood =
|
||||||
|
disksTurnedGood(failedLogDirsPreCheck, failedLogDirsPostCheck);
|
||||||
|
}
|
||||||
|
|
||||||
|
logDiskStatus(disksFailed, disksTurnedGood);
|
||||||
|
|
||||||
|
if (disksStatusChange) {
|
||||||
|
updateDirsAfterTest();
|
||||||
|
}
|
||||||
|
|
||||||
lastDisksCheckTime = System.currentTimeMillis();
|
lastDisksCheckTime = System.currentTimeMillis();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean disksTurnedBad(Set<String> preCheckFailedDirs,
|
||||||
|
Set<String> postCheckDirs) {
|
||||||
|
boolean disksFailed = false;
|
||||||
|
for (String dir : postCheckDirs) {
|
||||||
|
if (!preCheckFailedDirs.contains(dir)) {
|
||||||
|
disksFailed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return disksFailed;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean disksTurnedGood(Set<String> preCheckDirs,
|
||||||
|
Set<String> postCheckDirs) {
|
||||||
|
boolean disksTurnedGood = false;
|
||||||
|
for (String dir : preCheckDirs) {
|
||||||
|
if (!postCheckDirs.contains(dir)) {
|
||||||
|
disksTurnedGood = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return disksTurnedGood;
|
||||||
|
}
|
||||||
|
|
||||||
public Path getLocalPathForWrite(String pathStr) throws IOException {
|
public Path getLocalPathForWrite(String pathStr) throws IOException {
|
||||||
return localDirsAllocator.getLocalPathForWrite(pathStr, getConfig());
|
return localDirsAllocator.getLocalPathForWrite(pathStr, getConfig());
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,9 +55,9 @@ public class NodeHealthCheckerService extends CompositeService {
|
||||||
String scriptReport = (nodeHealthScriptRunner == null) ? ""
|
String scriptReport = (nodeHealthScriptRunner == null) ? ""
|
||||||
: nodeHealthScriptRunner.getHealthReport();
|
: nodeHealthScriptRunner.getHealthReport();
|
||||||
if (scriptReport.equals("")) {
|
if (scriptReport.equals("")) {
|
||||||
return dirsHandler.getDisksHealthReport();
|
return dirsHandler.getDisksHealthReport(false);
|
||||||
} else {
|
} else {
|
||||||
return scriptReport.concat(SEPARATOR + dirsHandler.getDisksHealthReport());
|
return scriptReport.concat(SEPARATOR + dirsHandler.getDisksHealthReport(false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -240,7 +240,7 @@ public class ContainerLaunch implements Callable<Integer> {
|
||||||
if (!dirsHandler.areDisksHealthy()) {
|
if (!dirsHandler.areDisksHealthy()) {
|
||||||
ret = ContainerExitStatus.DISKS_FAILED;
|
ret = ContainerExitStatus.DISKS_FAILED;
|
||||||
throw new IOException("Most of the disks failed. "
|
throw new IOException("Most of the disks failed. "
|
||||||
+ dirsHandler.getDisksHealthReport());
|
+ dirsHandler.getDisksHealthReport(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
|
@ -55,11 +55,13 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
|
import org.apache.hadoop.fs.FileAlreadyExistsException;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.RemoteIterator;
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.ipc.Server;
|
import org.apache.hadoop.ipc.Server;
|
||||||
import org.apache.hadoop.security.Credentials;
|
import org.apache.hadoop.security.Credentials;
|
||||||
|
@ -171,6 +173,8 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
private final ConcurrentMap<String,LocalResourcesTracker> appRsrc =
|
private final ConcurrentMap<String,LocalResourcesTracker> appRsrc =
|
||||||
new ConcurrentHashMap<String,LocalResourcesTracker>();
|
new ConcurrentHashMap<String,LocalResourcesTracker>();
|
||||||
|
|
||||||
|
FileContext lfs;
|
||||||
|
|
||||||
public ResourceLocalizationService(Dispatcher dispatcher,
|
public ResourceLocalizationService(Dispatcher dispatcher,
|
||||||
ContainerExecutor exec, DeletionService delService,
|
ContainerExecutor exec, DeletionService delService,
|
||||||
LocalDirsHandlerService dirsHandler, NMStateStoreService stateStore) {
|
LocalDirsHandlerService dirsHandler, NMStateStoreService stateStore) {
|
||||||
|
@ -219,32 +223,17 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
|
this.recordFactory = RecordFactoryProvider.getRecordFactory(conf);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
FileContext lfs = getLocalFileContext(conf);
|
lfs = getLocalFileContext(conf);
|
||||||
lfs.setUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK));
|
lfs.setUMask(new FsPermission((short) FsPermission.DEFAULT_UMASK));
|
||||||
|
|
||||||
if (!stateStore.canRecover() || stateStore.isNewlyCreated()) {
|
if (!stateStore.canRecover()|| stateStore.isNewlyCreated()) {
|
||||||
cleanUpLocalDir(lfs,delService);
|
cleanUpLocalDirs(lfs, delService);
|
||||||
|
initializeLocalDirs(lfs);
|
||||||
|
initializeLogDirs(lfs);
|
||||||
}
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
List<String> localDirs = dirsHandler.getLocalDirs();
|
throw new YarnRuntimeException(
|
||||||
for (String localDir : localDirs) {
|
"Failed to initialize LocalizationService", e);
|
||||||
// $local/usercache
|
|
||||||
Path userDir = new Path(localDir, ContainerLocalizer.USERCACHE);
|
|
||||||
lfs.mkdir(userDir, null, true);
|
|
||||||
// $local/filecache
|
|
||||||
Path fileDir = new Path(localDir, ContainerLocalizer.FILECACHE);
|
|
||||||
lfs.mkdir(fileDir, null, true);
|
|
||||||
// $local/nmPrivate
|
|
||||||
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
|
|
||||||
lfs.mkdir(sysDir, NM_PRIVATE_PERM, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> logDirs = dirsHandler.getLogDirs();
|
|
||||||
for (String logDir : logDirs) {
|
|
||||||
lfs.mkdir(new Path(logDir), null, true);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new YarnRuntimeException("Failed to initialize LocalizationService", e);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cacheTargetSize =
|
cacheTargetSize =
|
||||||
|
@ -497,22 +486,27 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
String containerIDStr = c.toString();
|
String containerIDStr = c.toString();
|
||||||
String appIDStr = ConverterUtils.toString(
|
String appIDStr = ConverterUtils.toString(
|
||||||
c.getContainerId().getApplicationAttemptId().getApplicationId());
|
c.getContainerId().getApplicationAttemptId().getApplicationId());
|
||||||
for (String localDir : dirsHandler.getLocalDirs()) {
|
|
||||||
|
|
||||||
|
// Try deleting from good local dirs and full local dirs because a dir might
|
||||||
|
// have gone bad while the app was running(disk full). In addition
|
||||||
|
// a dir might have become good while the app was running.
|
||||||
|
// Check if the container dir exists and if it does, try to delete it
|
||||||
|
|
||||||
|
for (String localDir : dirsHandler.getLocalDirsForCleanup()) {
|
||||||
// Delete the user-owned container-dir
|
// Delete the user-owned container-dir
|
||||||
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
|
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
|
||||||
Path userdir = new Path(usersdir, userName);
|
Path userdir = new Path(usersdir, userName);
|
||||||
Path allAppsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
|
Path allAppsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
|
||||||
Path appDir = new Path(allAppsdir, appIDStr);
|
Path appDir = new Path(allAppsdir, appIDStr);
|
||||||
Path containerDir = new Path(appDir, containerIDStr);
|
Path containerDir = new Path(appDir, containerIDStr);
|
||||||
delService.delete(userName, containerDir, new Path[] {});
|
submitDirForDeletion(userName, containerDir);
|
||||||
|
|
||||||
// Delete the nmPrivate container-dir
|
// Delete the nmPrivate container-dir
|
||||||
|
|
||||||
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
|
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
|
||||||
Path appSysDir = new Path(sysDir, appIDStr);
|
Path appSysDir = new Path(sysDir, appIDStr);
|
||||||
Path containerSysDir = new Path(appSysDir, containerIDStr);
|
Path containerSysDir = new Path(appSysDir, containerIDStr);
|
||||||
delService.delete(null, containerSysDir, new Path[] {});
|
submitDirForDeletion(null, containerSysDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
dispatcher.getEventHandler().handle(
|
dispatcher.getEventHandler().handle(
|
||||||
|
@ -520,6 +514,18 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP));
|
ContainerEventType.CONTAINER_RESOURCES_CLEANEDUP));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void submitDirForDeletion(String userName, Path dir) {
|
||||||
|
try {
|
||||||
|
lfs.getFileStatus(dir);
|
||||||
|
delService.delete(userName, dir, new Path[] {});
|
||||||
|
} catch (UnsupportedFileSystemException ue) {
|
||||||
|
LOG.warn("Local dir " + dir + " is an unsupported filesystem", ue);
|
||||||
|
} catch (IOException ie) {
|
||||||
|
// ignore
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@SuppressWarnings({"unchecked"})
|
@SuppressWarnings({"unchecked"})
|
||||||
private void handleDestroyApplicationResources(Application application) {
|
private void handleDestroyApplicationResources(Application application) {
|
||||||
|
@ -545,19 +551,22 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete the application directories
|
// Delete the application directories
|
||||||
for (String localDir : dirsHandler.getLocalDirs()) {
|
userName = application.getUser();
|
||||||
|
appIDStr = application.toString();
|
||||||
|
|
||||||
|
for (String localDir : dirsHandler.getLocalDirsForCleanup()) {
|
||||||
|
|
||||||
// Delete the user-owned app-dir
|
// Delete the user-owned app-dir
|
||||||
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
|
Path usersdir = new Path(localDir, ContainerLocalizer.USERCACHE);
|
||||||
Path userdir = new Path(usersdir, userName);
|
Path userdir = new Path(usersdir, userName);
|
||||||
Path allAppsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
|
Path allAppsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
|
||||||
Path appDir = new Path(allAppsdir, appIDStr);
|
Path appDir = new Path(allAppsdir, appIDStr);
|
||||||
delService.delete(userName, appDir, new Path[] {});
|
submitDirForDeletion(userName, appDir);
|
||||||
|
|
||||||
// Delete the nmPrivate app-dir
|
// Delete the nmPrivate app-dir
|
||||||
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
|
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
|
||||||
Path appSysDir = new Path(sysDir, appIDStr);
|
Path appSysDir = new Path(sysDir, appIDStr);
|
||||||
delService.delete(null, appSysDir, new Path[] {});
|
submitDirForDeletion(null, appSysDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: decrement reference counts of all resources associated with this
|
// TODO: decrement reference counts of all resources associated with this
|
||||||
|
@ -868,7 +877,7 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
/**
|
/**
|
||||||
* Find next resource to be given to a spawned localizer.
|
* Find next resource to be given to a spawned localizer.
|
||||||
*
|
*
|
||||||
* @return
|
* @return the next resource to be localized
|
||||||
*/
|
*/
|
||||||
private LocalResource findNextResource() {
|
private LocalResource findNextResource() {
|
||||||
synchronized (pending) {
|
synchronized (pending) {
|
||||||
|
@ -1071,8 +1080,8 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
// 1) write credentials to private dir
|
// 1) write credentials to private dir
|
||||||
writeCredentials(nmPrivateCTokensPath);
|
writeCredentials(nmPrivateCTokensPath);
|
||||||
// 2) exec initApplication and wait
|
// 2) exec initApplication and wait
|
||||||
List<String> localDirs = dirsHandler.getLocalDirs();
|
List<String> localDirs = getInitializedLocalDirs();
|
||||||
List<String> logDirs = dirsHandler.getLogDirs();
|
List<String> logDirs = getInitializedLogDirs();
|
||||||
if (dirsHandler.areDisksHealthy()) {
|
if (dirsHandler.areDisksHealthy()) {
|
||||||
exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
|
exec.startLocalizer(nmPrivateCTokensPath, localizationServerAddress,
|
||||||
context.getUser(),
|
context.getUser(),
|
||||||
|
@ -1082,7 +1091,7 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
localizerId, localDirs, logDirs);
|
localizerId, localDirs, logDirs);
|
||||||
} else {
|
} else {
|
||||||
throw new IOException("All disks failed. "
|
throw new IOException("All disks failed. "
|
||||||
+ dirsHandler.getDisksHealthReport());
|
+ dirsHandler.getDisksHealthReport(false));
|
||||||
}
|
}
|
||||||
// TODO handle ExitCodeException separately?
|
// TODO handle ExitCodeException separately?
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -1151,9 +1160,81 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void cleanUpLocalDir(FileContext lfs, DeletionService del) {
|
private void initializeLocalDirs(FileContext lfs) {
|
||||||
long currentTimeStamp = System.currentTimeMillis();
|
List<String> localDirs = dirsHandler.getLocalDirs();
|
||||||
|
for (String localDir : localDirs) {
|
||||||
|
initializeLocalDir(lfs, localDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeLocalDir(FileContext lfs, String localDir) {
|
||||||
|
|
||||||
|
Map<Path, FsPermission> pathPermissionMap = getLocalDirsPathPermissionsMap(localDir);
|
||||||
|
for (Map.Entry<Path, FsPermission> entry : pathPermissionMap.entrySet()) {
|
||||||
|
FileStatus status;
|
||||||
|
try {
|
||||||
|
status = lfs.getFileStatus(entry.getKey());
|
||||||
|
}
|
||||||
|
catch(FileNotFoundException fs) {
|
||||||
|
status = null;
|
||||||
|
}
|
||||||
|
catch(IOException ie) {
|
||||||
|
String msg = "Could not get file status for local dir " + entry.getKey();
|
||||||
|
LOG.warn(msg, ie);
|
||||||
|
throw new YarnRuntimeException(msg, ie);
|
||||||
|
}
|
||||||
|
if(status == null) {
|
||||||
|
try {
|
||||||
|
lfs.mkdir(entry.getKey(), entry.getValue(), true);
|
||||||
|
status = lfs.getFileStatus(entry.getKey());
|
||||||
|
} catch (IOException e) {
|
||||||
|
String msg = "Could not initialize local dir " + entry.getKey();
|
||||||
|
LOG.warn(msg, e);
|
||||||
|
throw new YarnRuntimeException(msg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FsPermission perms = status.getPermission();
|
||||||
|
if(!perms.equals(entry.getValue())) {
|
||||||
|
try {
|
||||||
|
lfs.setPermission(entry.getKey(), entry.getValue());
|
||||||
|
}
|
||||||
|
catch(IOException ie) {
|
||||||
|
String msg = "Could not set permissions for local dir " + entry.getKey();
|
||||||
|
LOG.warn(msg, ie);
|
||||||
|
throw new YarnRuntimeException(msg, ie);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeLogDirs(FileContext lfs) {
|
||||||
|
List<String> logDirs = dirsHandler.getLogDirs();
|
||||||
|
for (String logDir : logDirs) {
|
||||||
|
initializeLogDir(lfs, logDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initializeLogDir(FileContext lfs, String logDir) {
|
||||||
|
try {
|
||||||
|
lfs.mkdir(new Path(logDir), null, true);
|
||||||
|
} catch (FileAlreadyExistsException fe) {
|
||||||
|
// do nothing
|
||||||
|
} catch (IOException e) {
|
||||||
|
String msg = "Could not initialize log dir " + logDir;
|
||||||
|
LOG.warn(msg, e);
|
||||||
|
throw new YarnRuntimeException(msg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void cleanUpLocalDirs(FileContext lfs, DeletionService del) {
|
||||||
for (String localDir : dirsHandler.getLocalDirs()) {
|
for (String localDir : dirsHandler.getLocalDirs()) {
|
||||||
|
cleanUpLocalDir(lfs, del, localDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void cleanUpLocalDir(FileContext lfs, DeletionService del,
|
||||||
|
String localDir) {
|
||||||
|
long currentTimeStamp = System.currentTimeMillis();
|
||||||
renameLocalDir(lfs, localDir, ContainerLocalizer.USERCACHE,
|
renameLocalDir(lfs, localDir, ContainerLocalizer.USERCACHE,
|
||||||
currentTimeStamp);
|
currentTimeStamp);
|
||||||
renameLocalDir(lfs, localDir, ContainerLocalizer.FILECACHE,
|
renameLocalDir(lfs, localDir, ContainerLocalizer.FILECACHE,
|
||||||
|
@ -1167,7 +1248,6 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
LOG.warn("Failed to delete localDir: " + localDir);
|
LOG.warn("Failed to delete localDir: " + localDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
private void renameLocalDir(FileContext lfs, String localDir,
|
private void renameLocalDir(FileContext lfs, String localDir,
|
||||||
String localSubDir, long currentTimeStamp) {
|
String localSubDir, long currentTimeStamp) {
|
||||||
|
@ -1235,4 +1315,94 @@ public class ResourceLocalizationService extends CompositeService
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronized method to get a list of initialized local dirs. Method will
|
||||||
|
* check each local dir to ensure it has been setup correctly and will attempt
|
||||||
|
* to fix any issues it finds.
|
||||||
|
*
|
||||||
|
* @return list of initialized local dirs
|
||||||
|
*/
|
||||||
|
synchronized private List<String> getInitializedLocalDirs() {
|
||||||
|
List<String> dirs = dirsHandler.getLocalDirs();
|
||||||
|
List<String> checkFailedDirs = new ArrayList<String>();
|
||||||
|
for (String dir : dirs) {
|
||||||
|
try {
|
||||||
|
checkLocalDir(dir);
|
||||||
|
} catch (YarnRuntimeException e) {
|
||||||
|
checkFailedDirs.add(dir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (String dir : checkFailedDirs) {
|
||||||
|
LOG.info("Attempting to initialize " + dir);
|
||||||
|
initializeLocalDir(lfs, dir);
|
||||||
|
try {
|
||||||
|
checkLocalDir(dir);
|
||||||
|
} catch (YarnRuntimeException e) {
|
||||||
|
String msg =
|
||||||
|
"Failed to setup local dir " + dir + ", which was marked as good.";
|
||||||
|
LOG.warn(msg, e);
|
||||||
|
throw new YarnRuntimeException(msg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dirs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkLocalDir(String localDir) {
|
||||||
|
|
||||||
|
Map<Path, FsPermission> pathPermissionMap = getLocalDirsPathPermissionsMap(localDir);
|
||||||
|
|
||||||
|
for (Map.Entry<Path, FsPermission> entry : pathPermissionMap.entrySet()) {
|
||||||
|
FileStatus status;
|
||||||
|
try {
|
||||||
|
status = lfs.getFileStatus(entry.getKey());
|
||||||
|
} catch (Exception e) {
|
||||||
|
String msg =
|
||||||
|
"Could not carry out resource dir checks for " + localDir
|
||||||
|
+ ", which was marked as good";
|
||||||
|
LOG.warn(msg, e);
|
||||||
|
throw new YarnRuntimeException(msg, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!status.getPermission().equals(entry.getValue())) {
|
||||||
|
String msg =
|
||||||
|
"Permissions incorrectly set for dir " + entry.getKey()
|
||||||
|
+ ", should be " + entry.getValue() + ", actual value = "
|
||||||
|
+ status.getPermission();
|
||||||
|
LOG.warn(msg);
|
||||||
|
throw new YarnRuntimeException(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<Path, FsPermission> getLocalDirsPathPermissionsMap(String localDir) {
|
||||||
|
Map<Path, FsPermission> localDirPathFsPermissionsMap = new HashMap<Path, FsPermission>();
|
||||||
|
|
||||||
|
FsPermission defaultPermission =
|
||||||
|
FsPermission.getDirDefault().applyUMask(lfs.getUMask());
|
||||||
|
FsPermission nmPrivatePermission =
|
||||||
|
NM_PRIVATE_PERM.applyUMask(lfs.getUMask());
|
||||||
|
|
||||||
|
Path userDir = new Path(localDir, ContainerLocalizer.USERCACHE);
|
||||||
|
Path fileDir = new Path(localDir, ContainerLocalizer.FILECACHE);
|
||||||
|
Path sysDir = new Path(localDir, NM_PRIVATE_DIR);
|
||||||
|
|
||||||
|
localDirPathFsPermissionsMap.put(userDir, defaultPermission);
|
||||||
|
localDirPathFsPermissionsMap.put(fileDir, defaultPermission);
|
||||||
|
localDirPathFsPermissionsMap.put(sysDir, nmPrivatePermission);
|
||||||
|
return localDirPathFsPermissionsMap;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Synchronized method to get a list of initialized log dirs. Method will
|
||||||
|
* check each local dir to ensure it has been setup correctly and will attempt
|
||||||
|
* to fix any issues it finds.
|
||||||
|
*
|
||||||
|
* @return list of initialized log dirs
|
||||||
|
*/
|
||||||
|
synchronized private List<String> getInitializedLogDirs() {
|
||||||
|
List<String> dirs = dirsHandler.getLogDirs();
|
||||||
|
initializeLogDirs(lfs);
|
||||||
|
return dirs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregatio
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.security.PrivilegedExceptionAction;
|
import java.security.PrivilegedExceptionAction;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
@ -37,9 +38,11 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
|
@ -107,6 +110,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
private final AtomicBoolean appAggregationFinished = new AtomicBoolean();
|
private final AtomicBoolean appAggregationFinished = new AtomicBoolean();
|
||||||
private final AtomicBoolean aborted = new AtomicBoolean();
|
private final AtomicBoolean aborted = new AtomicBoolean();
|
||||||
private final Map<ApplicationAccessType, String> appAcls;
|
private final Map<ApplicationAccessType, String> appAcls;
|
||||||
|
private final FileContext lfs;
|
||||||
private final LogAggregationContext logAggregationContext;
|
private final LogAggregationContext logAggregationContext;
|
||||||
private final Context context;
|
private final Context context;
|
||||||
private final int retentionSize;
|
private final int retentionSize;
|
||||||
|
@ -122,8 +126,8 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
LocalDirsHandlerService dirsHandler, Path remoteNodeLogFileForApp,
|
LocalDirsHandlerService dirsHandler, Path remoteNodeLogFileForApp,
|
||||||
ContainerLogsRetentionPolicy retentionPolicy,
|
ContainerLogsRetentionPolicy retentionPolicy,
|
||||||
Map<ApplicationAccessType, String> appAcls,
|
Map<ApplicationAccessType, String> appAcls,
|
||||||
LogAggregationContext logAggregationContext,
|
LogAggregationContext logAggregationContext, Context context,
|
||||||
Context context) {
|
FileContext lfs) {
|
||||||
this.dispatcher = dispatcher;
|
this.dispatcher = dispatcher;
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
this.delService = deletionService;
|
this.delService = deletionService;
|
||||||
|
@ -136,6 +140,7 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
this.retentionPolicy = retentionPolicy;
|
this.retentionPolicy = retentionPolicy;
|
||||||
this.pendingContainers = new LinkedBlockingQueue<ContainerId>();
|
this.pendingContainers = new LinkedBlockingQueue<ContainerId>();
|
||||||
this.appAcls = appAcls;
|
this.appAcls = appAcls;
|
||||||
|
this.lfs = lfs;
|
||||||
this.logAggregationContext = logAggregationContext;
|
this.logAggregationContext = logAggregationContext;
|
||||||
this.context = context;
|
this.context = context;
|
||||||
this.nodeId = nodeId;
|
this.nodeId = nodeId;
|
||||||
|
@ -395,15 +400,25 @@ public class AppLogAggregatorImpl implements AppLogAggregator {
|
||||||
uploadLogsForContainers();
|
uploadLogsForContainers();
|
||||||
|
|
||||||
// Remove the local app-log-dirs
|
// Remove the local app-log-dirs
|
||||||
List<String> rootLogDirs = dirsHandler.getLogDirs();
|
List<Path> localAppLogDirs = new ArrayList<Path>();
|
||||||
Path[] localAppLogDirs = new Path[rootLogDirs.size()];
|
for (String rootLogDir : dirsHandler.getLogDirsForCleanup()) {
|
||||||
int index = 0;
|
Path logPath = new Path(rootLogDir, applicationId);
|
||||||
for (String rootLogDir : rootLogDirs) {
|
try {
|
||||||
localAppLogDirs[index] = new Path(rootLogDir, this.applicationId);
|
// check if log dir exists
|
||||||
index++;
|
lfs.getFileStatus(logPath);
|
||||||
|
localAppLogDirs.add(logPath);
|
||||||
|
} catch (UnsupportedFileSystemException ue) {
|
||||||
|
LOG.warn("Log dir " + rootLogDir + "is an unsupported file system", ue);
|
||||||
|
continue;
|
||||||
|
} catch (IOException fe) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (localAppLogDirs.size() > 0) {
|
||||||
this.delService.delete(this.userUgi.getShortUserName(), null,
|
this.delService.delete(this.userUgi.getShortUserName(), null,
|
||||||
localAppLogDirs);
|
localAppLogDirs.toArray(new Path[localAppLogDirs.size()]));
|
||||||
|
}
|
||||||
|
|
||||||
this.dispatcher.getEventHandler().handle(
|
this.dispatcher.getEventHandler().handle(
|
||||||
new ApplicationEvent(this.appId,
|
new ApplicationEvent(this.appId,
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -327,6 +328,15 @@ public class LogAggregationService extends AbstractService implements
|
||||||
this.dispatcher.getEventHandler().handle(eventResponse);
|
this.dispatcher.getEventHandler().handle(eventResponse);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FileContext getLocalFileContext(Configuration conf) {
|
||||||
|
try {
|
||||||
|
return FileContext.getLocalFSFileContext(conf);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new YarnRuntimeException("Failed to access local fs");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
protected void initAppAggregator(final ApplicationId appId, String user,
|
protected void initAppAggregator(final ApplicationId appId, String user,
|
||||||
Credentials credentials, ContainerLogsRetentionPolicy logRetentionPolicy,
|
Credentials credentials, ContainerLogsRetentionPolicy logRetentionPolicy,
|
||||||
Map<ApplicationAccessType, String> appAcls,
|
Map<ApplicationAccessType, String> appAcls,
|
||||||
|
@ -344,7 +354,8 @@ public class LogAggregationService extends AbstractService implements
|
||||||
new AppLogAggregatorImpl(this.dispatcher, this.deletionService,
|
new AppLogAggregatorImpl(this.dispatcher, this.deletionService,
|
||||||
getConfig(), appId, userUgi, this.nodeId, dirsHandler,
|
getConfig(), appId, userUgi, this.nodeId, dirsHandler,
|
||||||
getRemoteNodeLogFileForApp(appId, user), logRetentionPolicy,
|
getRemoteNodeLogFileForApp(appId, user), logRetentionPolicy,
|
||||||
appAcls, logAggregationContext, this.context);
|
appAcls, logAggregationContext, this.context,
|
||||||
|
getLocalFileContext(getConfig()));
|
||||||
if (this.appLogAggregators.putIfAbsent(appId, appLogAggregator) != null) {
|
if (this.appLogAggregators.putIfAbsent(appId, appLogAggregator) != null) {
|
||||||
throw new YarnRuntimeException("Duplicate initApp for " + appId);
|
throw new YarnRuntimeException("Duplicate initApp for " + appId);
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler;
|
package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
@ -28,11 +30,14 @@ import java.util.concurrent.RejectedExecutionException;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.event.Dispatcher;
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
|
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
||||||
|
@ -97,6 +102,15 @@ public class NonAggregatingLogHandler extends AbstractService implements
|
||||||
super.serviceStop();
|
super.serviceStop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
FileContext getLocalFileContext(Configuration conf) {
|
||||||
|
try {
|
||||||
|
return FileContext.getLocalFSFileContext(conf);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new YarnRuntimeException("Failed to access local fs");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public void handle(LogHandlerEvent event) {
|
public void handle(LogHandlerEvent event) {
|
||||||
|
@ -160,21 +174,30 @@ public class NonAggregatingLogHandler extends AbstractService implements
|
||||||
@Override
|
@Override
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public void run() {
|
public void run() {
|
||||||
List<String> rootLogDirs =
|
List<Path> localAppLogDirs = new ArrayList<Path>();
|
||||||
NonAggregatingLogHandler.this.dirsHandler.getLogDirs();
|
FileContext lfs = getLocalFileContext(getConfig());
|
||||||
Path[] localAppLogDirs = new Path[rootLogDirs.size()];
|
for (String rootLogDir : dirsHandler.getLogDirsForCleanup()) {
|
||||||
int index = 0;
|
Path logDir = new Path(rootLogDir, applicationId.toString());
|
||||||
for (String rootLogDir : rootLogDirs) {
|
try {
|
||||||
localAppLogDirs[index] = new Path(rootLogDir, applicationId.toString());
|
lfs.getFileStatus(logDir);
|
||||||
index++;
|
localAppLogDirs.add(logDir);
|
||||||
|
} catch (UnsupportedFileSystemException ue) {
|
||||||
|
LOG.warn("Unsupported file system used for log dir " + logDir, ue);
|
||||||
|
continue;
|
||||||
|
} catch (IOException ie) {
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Inform the application before the actual delete itself, so that links
|
// Inform the application before the actual delete itself, so that links
|
||||||
// to logs will no longer be there on NM web-UI.
|
// to logs will no longer be there on NM web-UI.
|
||||||
NonAggregatingLogHandler.this.dispatcher.getEventHandler().handle(
|
NonAggregatingLogHandler.this.dispatcher.getEventHandler().handle(
|
||||||
new ApplicationEvent(this.applicationId,
|
new ApplicationEvent(this.applicationId,
|
||||||
ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED));
|
ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED));
|
||||||
|
if (localAppLogDirs.size() > 0) {
|
||||||
NonAggregatingLogHandler.this.delService.delete(user, null,
|
NonAggregatingLogHandler.this.delService.delete(user, null,
|
||||||
localAppLogDirs);
|
(Path[]) localAppLogDirs.toArray(new Path[localAppLogDirs.size()]));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -34,6 +34,8 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
public class TestDirectoryCollection {
|
public class TestDirectoryCollection {
|
||||||
|
@ -42,8 +44,13 @@ public class TestDirectoryCollection {
|
||||||
TestDirectoryCollection.class.getName()).getAbsoluteFile();
|
TestDirectoryCollection.class.getName()).getAbsoluteFile();
|
||||||
private static final File testFile = new File(testDir, "testfile");
|
private static final File testFile = new File(testDir, "testfile");
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
private FileContext localFs;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() throws IOException {
|
public void setupForTests() throws IOException {
|
||||||
|
conf = new Configuration();
|
||||||
|
localFs = FileContext.getLocalFSFileContext(conf);
|
||||||
testDir.mkdirs();
|
testDir.mkdirs();
|
||||||
testFile.createNewFile();
|
testFile.createNewFile();
|
||||||
}
|
}
|
||||||
|
@ -56,10 +63,11 @@ public class TestDirectoryCollection {
|
||||||
@Test
|
@Test
|
||||||
public void testConcurrentAccess() throws IOException {
|
public void testConcurrentAccess() throws IOException {
|
||||||
// Initialize DirectoryCollection with a file instead of a directory
|
// Initialize DirectoryCollection with a file instead of a directory
|
||||||
Configuration conf = new Configuration();
|
|
||||||
String[] dirs = {testFile.getPath()};
|
String[] dirs = {testFile.getPath()};
|
||||||
DirectoryCollection dc = new DirectoryCollection(dirs,
|
DirectoryCollection dc =
|
||||||
conf.getFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
|
new DirectoryCollection(dirs, conf.getFloat(
|
||||||
|
YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
|
||||||
YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE));
|
YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE));
|
||||||
|
|
||||||
// Create an iterator before checkDirs is called to reliable test case
|
// Create an iterator before checkDirs is called to reliable test case
|
||||||
|
@ -78,9 +86,8 @@ public class TestDirectoryCollection {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testCreateDirectories() throws IOException {
|
public void testCreateDirectories() throws IOException {
|
||||||
Configuration conf = new Configuration();
|
|
||||||
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
|
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
|
||||||
FileContext localFs = FileContext.getLocalFSFileContext(conf);
|
|
||||||
|
|
||||||
String dirA = new File(testDir, "dirA").getPath();
|
String dirA = new File(testDir, "dirA").getPath();
|
||||||
String dirB = new File(dirA, "dirB").getPath();
|
String dirB = new File(dirA, "dirB").getPath();
|
||||||
|
@ -92,8 +99,9 @@ public class TestDirectoryCollection {
|
||||||
localFs.setPermission(pathC, permDirC);
|
localFs.setPermission(pathC, permDirC);
|
||||||
|
|
||||||
String[] dirs = { dirA, dirB, dirC };
|
String[] dirs = { dirA, dirB, dirC };
|
||||||
DirectoryCollection dc = new DirectoryCollection(dirs,
|
DirectoryCollection dc =
|
||||||
conf.getFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
|
new DirectoryCollection(dirs, conf.getFloat(
|
||||||
|
YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
|
||||||
YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE));
|
YarnConfiguration.DEFAULT_NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE));
|
||||||
FsPermission defaultPerm = FsPermission.getDefault()
|
FsPermission defaultPerm = FsPermission.getDefault()
|
||||||
.applyUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK));
|
.applyUMask(new FsPermission((short)FsPermission.DEFAULT_UMASK));
|
||||||
|
@ -120,25 +128,29 @@ public class TestDirectoryCollection {
|
||||||
dc.checkDirs();
|
dc.checkDirs();
|
||||||
Assert.assertEquals(0, dc.getGoodDirs().size());
|
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||||
Assert.assertEquals(1, dc.getFailedDirs().size());
|
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||||
|
|
||||||
dc = new DirectoryCollection(dirs, 100.0F);
|
dc = new DirectoryCollection(dirs, 100.0F);
|
||||||
dc.checkDirs();
|
dc.checkDirs();
|
||||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
Assert.assertEquals(0, dc.getFailedDirs().size());
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
|
||||||
dc = new DirectoryCollection(dirs, testDir.getTotalSpace() / (1024 * 1024));
|
dc = new DirectoryCollection(dirs, testDir.getTotalSpace() / (1024 * 1024));
|
||||||
dc.checkDirs();
|
dc.checkDirs();
|
||||||
Assert.assertEquals(0, dc.getGoodDirs().size());
|
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||||
Assert.assertEquals(1, dc.getFailedDirs().size());
|
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||||
|
|
||||||
dc = new DirectoryCollection(dirs, 100.0F, 0);
|
dc = new DirectoryCollection(dirs, 100.0F, 0);
|
||||||
dc.checkDirs();
|
dc.checkDirs();
|
||||||
Assert.assertEquals(1, dc.getGoodDirs().size());
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
Assert.assertEquals(0, dc.getFailedDirs().size());
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDiskLimitsCutoffSetters() {
|
public void testDiskLimitsCutoffSetters() throws IOException {
|
||||||
|
|
||||||
String[] dirs = { "dir" };
|
String[] dirs = { "dir" };
|
||||||
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100);
|
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F, 100);
|
||||||
|
@ -162,6 +174,47 @@ public class TestDirectoryCollection {
|
||||||
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
|
Assert.assertEquals(0, dc.getDiskUtilizationSpaceCutoff());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFailedDisksBecomingGoodAgain() throws Exception {
|
||||||
|
|
||||||
|
String dirA = new File(testDir, "dirA").getPath();
|
||||||
|
String[] dirs = { dirA };
|
||||||
|
DirectoryCollection dc = new DirectoryCollection(dirs, 0.0F);
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFullDirs().size());
|
||||||
|
|
||||||
|
dc.setDiskUtilizationPercentageCutoff(100.0F);
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
|
||||||
|
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
|
||||||
|
|
||||||
|
String dirB = new File(testDir, "dirB").getPath();
|
||||||
|
Path pathB = new Path(dirB);
|
||||||
|
FsPermission permDirB = new FsPermission((short) 0400);
|
||||||
|
|
||||||
|
localFs.mkdir(pathB, null, true);
|
||||||
|
localFs.setPermission(pathB, permDirB);
|
||||||
|
|
||||||
|
String[] dirs2 = { dirB };
|
||||||
|
|
||||||
|
dc = new DirectoryCollection(dirs2, 100.0F);
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(0, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(1, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
permDirB = new FsPermission((short) 0700);
|
||||||
|
localFs.setPermission(pathB, permDirB);
|
||||||
|
dc.checkDirs();
|
||||||
|
Assert.assertEquals(1, dc.getGoodDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFailedDirs().size());
|
||||||
|
Assert.assertEquals(0, dc.getFullDirs().size());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testConstructors() {
|
public void testConstructors() {
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,13 @@ package org.apache.hadoop.yarn.server.nodemanager;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||||
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.FileUtil;
|
import org.apache.hadoop.fs.FileUtil;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.service.Service.STATE;
|
import org.apache.hadoop.service.Service.STATE;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
|
||||||
|
@ -57,10 +62,11 @@ public class TestLocalDirsHandlerService {
|
||||||
LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
|
LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
|
||||||
dirSvc.init(conf);
|
dirSvc.init(conf);
|
||||||
Assert.assertEquals(1, dirSvc.getLocalDirs().size());
|
Assert.assertEquals(1, dirSvc.getLocalDirs().size());
|
||||||
|
dirSvc.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testValidPathsDirHandlerService() {
|
public void testValidPathsDirHandlerService() throws Exception {
|
||||||
Configuration conf = new YarnConfiguration();
|
Configuration conf = new YarnConfiguration();
|
||||||
String localDir1 = new File("file:///" + testDir, "localDir1").getPath();
|
String localDir1 = new File("file:///" + testDir, "localDir1").getPath();
|
||||||
String localDir2 = new File("hdfs:///" + testDir, "localDir2").getPath();
|
String localDir2 = new File("hdfs:///" + testDir, "localDir2").getPath();
|
||||||
|
@ -76,5 +82,40 @@ public class TestLocalDirsHandlerService {
|
||||||
Assert.assertEquals("Service should not be inited",
|
Assert.assertEquals("Service should not be inited",
|
||||||
STATE.STOPPED,
|
STATE.STOPPED,
|
||||||
dirSvc.getServiceState());
|
dirSvc.getServiceState());
|
||||||
|
dirSvc.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetFullDirs() throws Exception {
|
||||||
|
Configuration conf = new YarnConfiguration();
|
||||||
|
|
||||||
|
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
|
||||||
|
FileContext localFs = FileContext.getLocalFSFileContext(conf);
|
||||||
|
|
||||||
|
String localDir1 = new File(testDir, "localDir1").getPath();
|
||||||
|
String localDir2 = new File(testDir, "localDir2").getPath();
|
||||||
|
String logDir1 = new File(testDir, "logDir1").getPath();
|
||||||
|
String logDir2 = new File(testDir, "logDir2").getPath();
|
||||||
|
Path localDir1Path = new Path(localDir1);
|
||||||
|
Path logDir1Path = new Path(logDir1);
|
||||||
|
FsPermission dirPermissions = new FsPermission((short) 0410);
|
||||||
|
localFs.mkdir(localDir1Path, dirPermissions, true);
|
||||||
|
localFs.mkdir(logDir1Path, dirPermissions, true);
|
||||||
|
|
||||||
|
conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir1 + "," + localDir2);
|
||||||
|
conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2);
|
||||||
|
conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE,
|
||||||
|
0.0f);
|
||||||
|
LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
|
||||||
|
dirSvc.init(conf);
|
||||||
|
Assert.assertEquals(0, dirSvc.getLocalDirs().size());
|
||||||
|
Assert.assertEquals(0, dirSvc.getLogDirs().size());
|
||||||
|
Assert.assertEquals(1, dirSvc.getDiskFullLocalDirs().size());
|
||||||
|
Assert.assertEquals(1, dirSvc.getDiskFullLogDirs().size());
|
||||||
|
FileUtils.deleteDirectory(new File(localDir1));
|
||||||
|
FileUtils.deleteDirectory(new File(localDir2));
|
||||||
|
FileUtils.deleteDirectory(new File(logDir1));
|
||||||
|
FileUtils.deleteDirectory(new File(logDir1));
|
||||||
|
dirSvc.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -196,7 +196,7 @@ public class TestNodeHealthService {
|
||||||
healthStatus.getHealthReport().equals(
|
healthStatus.getHealthReport().equals(
|
||||||
NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG
|
NodeHealthScriptRunner.NODE_HEALTH_SCRIPT_TIMED_OUT_MSG
|
||||||
+ NodeHealthCheckerService.SEPARATOR
|
+ NodeHealthCheckerService.SEPARATOR
|
||||||
+ nodeHealthChecker.getDiskHandler().getDisksHealthReport()));
|
+ nodeHealthChecker.getDiskHandler().getDisksHealthReport(false)));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,6 +42,7 @@ import static org.mockito.Mockito.verify;
|
||||||
import static org.mockito.Mockito.verifyNoMoreInteractions;
|
import static org.mockito.Mockito.verifyNoMoreInteractions;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
|
@ -68,13 +69,16 @@ import org.apache.hadoop.fs.UnresolvedLinkException;
|
||||||
import org.apache.hadoop.security.AccessControlException;
|
import org.apache.hadoop.security.AccessControlException;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.AbstractFileSystem;
|
import org.apache.hadoop.fs.AbstractFileSystem;
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
import org.apache.hadoop.fs.CommonConfigurationKeys;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
import org.apache.hadoop.fs.Options.ChecksumOpt;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.hadoop.io.DataOutputBuffer;
|
import org.apache.hadoop.io.DataOutputBuffer;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
|
@ -167,15 +171,15 @@ public class TestResourceLocalizationService {
|
||||||
conf = new Configuration();
|
conf = new Configuration();
|
||||||
spylfs = spy(FileContext.getLocalFSFileContext().getDefaultFileSystem());
|
spylfs = spy(FileContext.getLocalFSFileContext().getDefaultFileSystem());
|
||||||
lfs = FileContext.getFileContext(spylfs, conf);
|
lfs = FileContext.getFileContext(spylfs, conf);
|
||||||
doNothing().when(spylfs).mkdir(
|
|
||||||
isA(Path.class), isA(FsPermission.class), anyBoolean());
|
|
||||||
String logDir = lfs.makeQualified(new Path(basedir, "logdir ")).toString();
|
String logDir = lfs.makeQualified(new Path(basedir, "logdir ")).toString();
|
||||||
conf.set(YarnConfiguration.NM_LOG_DIRS, logDir);
|
conf.set(YarnConfiguration.NM_LOG_DIRS, logDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void cleanup() {
|
public void cleanup() throws IOException {
|
||||||
conf = null;
|
conf = null;
|
||||||
|
FileUtils.deleteDirectory(new File(basedir.toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -752,6 +756,39 @@ public class TestResourceLocalizationService {
|
||||||
ResourceLocalizationService spyService = spy(rawService);
|
ResourceLocalizationService spyService = spy(rawService);
|
||||||
doReturn(mockServer).when(spyService).createServer();
|
doReturn(mockServer).when(spyService).createServer();
|
||||||
doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
|
doReturn(lfs).when(spyService).getLocalFileContext(isA(Configuration.class));
|
||||||
|
FsPermission defaultPermission =
|
||||||
|
FsPermission.getDirDefault().applyUMask(lfs.getUMask());
|
||||||
|
FsPermission nmPermission =
|
||||||
|
ResourceLocalizationService.NM_PRIVATE_PERM.applyUMask(lfs.getUMask());
|
||||||
|
final Path userDir =
|
||||||
|
new Path(sDirs[0].substring("file:".length()),
|
||||||
|
ContainerLocalizer.USERCACHE);
|
||||||
|
final Path fileDir =
|
||||||
|
new Path(sDirs[0].substring("file:".length()),
|
||||||
|
ContainerLocalizer.FILECACHE);
|
||||||
|
final Path sysDir =
|
||||||
|
new Path(sDirs[0].substring("file:".length()),
|
||||||
|
ResourceLocalizationService.NM_PRIVATE_DIR);
|
||||||
|
final FileStatus fs =
|
||||||
|
new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
|
||||||
|
defaultPermission, "", "", new Path(sDirs[0]));
|
||||||
|
final FileStatus nmFs =
|
||||||
|
new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
|
||||||
|
nmPermission, "", "", sysDir);
|
||||||
|
|
||||||
|
doAnswer(new Answer<FileStatus>() {
|
||||||
|
@Override
|
||||||
|
public FileStatus answer(InvocationOnMock invocation) throws Throwable {
|
||||||
|
Object[] args = invocation.getArguments();
|
||||||
|
if (args.length > 0) {
|
||||||
|
if (args[0].equals(userDir) || args[0].equals(fileDir)) {
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nmFs;
|
||||||
|
}
|
||||||
|
}).when(spylfs).getFileStatus(isA(Path.class));
|
||||||
|
|
||||||
try {
|
try {
|
||||||
spyService.init(conf);
|
spyService.init(conf);
|
||||||
spyService.start();
|
spyService.start();
|
||||||
|
@ -1776,4 +1813,273 @@ public class TestResourceLocalizationService {
|
||||||
new Text("kind" + id), new Text("service" + id));
|
new Text("kind" + id), new Text("service" + id));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test to ensure ResourceLocalizationService can handle local dirs going bad.
|
||||||
|
* Test first sets up all the components required, then sends events to fetch
|
||||||
|
* a private, app and public resource. It then sends events to clean up the
|
||||||
|
* container and the app and ensures the right delete calls were made.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
// mocked generics
|
||||||
|
public void testFailedDirsResourceRelease() throws Exception {
|
||||||
|
// setup components
|
||||||
|
File f = new File(basedir.toString());
|
||||||
|
String[] sDirs = new String[4];
|
||||||
|
List<Path> localDirs = new ArrayList<Path>(sDirs.length);
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
sDirs[i] = f.getAbsolutePath() + i;
|
||||||
|
localDirs.add(new Path(sDirs[i]));
|
||||||
|
}
|
||||||
|
List<Path> containerLocalDirs = new ArrayList<Path>(localDirs.size());
|
||||||
|
List<Path> appLocalDirs = new ArrayList<Path>(localDirs.size());
|
||||||
|
List<Path> nmLocalContainerDirs = new ArrayList<Path>(localDirs.size());
|
||||||
|
List<Path> nmLocalAppDirs = new ArrayList<Path>(localDirs.size());
|
||||||
|
conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, sDirs);
|
||||||
|
conf.setLong(YarnConfiguration.NM_DISK_HEALTH_CHECK_INTERVAL_MS, 500);
|
||||||
|
|
||||||
|
LocalizerTracker mockLocallilzerTracker = mock(LocalizerTracker.class);
|
||||||
|
DrainDispatcher dispatcher = new DrainDispatcher();
|
||||||
|
dispatcher.init(conf);
|
||||||
|
dispatcher.start();
|
||||||
|
EventHandler<ApplicationEvent> applicationBus = mock(EventHandler.class);
|
||||||
|
dispatcher.register(ApplicationEventType.class, applicationBus);
|
||||||
|
EventHandler<ContainerEvent> containerBus = mock(EventHandler.class);
|
||||||
|
dispatcher.register(ContainerEventType.class, containerBus);
|
||||||
|
// Ignore actual localization
|
||||||
|
EventHandler<LocalizerEvent> localizerBus = mock(EventHandler.class);
|
||||||
|
dispatcher.register(LocalizerEventType.class, localizerBus);
|
||||||
|
|
||||||
|
ContainerExecutor exec = mock(ContainerExecutor.class);
|
||||||
|
LocalDirsHandlerService mockDirsHandler =
|
||||||
|
mock(LocalDirsHandlerService.class);
|
||||||
|
doReturn(new ArrayList<String>(Arrays.asList(sDirs))).when(
|
||||||
|
mockDirsHandler).getLocalDirsForCleanup();
|
||||||
|
|
||||||
|
DeletionService delService = mock(DeletionService.class);
|
||||||
|
|
||||||
|
// setup mocks
|
||||||
|
ResourceLocalizationService rawService =
|
||||||
|
new ResourceLocalizationService(dispatcher, exec, delService,
|
||||||
|
mockDirsHandler, new NMNullStateStoreService());
|
||||||
|
ResourceLocalizationService spyService = spy(rawService);
|
||||||
|
doReturn(mockServer).when(spyService).createServer();
|
||||||
|
doReturn(mockLocallilzerTracker).when(spyService).createLocalizerTracker(
|
||||||
|
isA(Configuration.class));
|
||||||
|
doReturn(lfs).when(spyService)
|
||||||
|
.getLocalFileContext(isA(Configuration.class));
|
||||||
|
FsPermission defaultPermission =
|
||||||
|
FsPermission.getDirDefault().applyUMask(lfs.getUMask());
|
||||||
|
FsPermission nmPermission =
|
||||||
|
ResourceLocalizationService.NM_PRIVATE_PERM.applyUMask(lfs.getUMask());
|
||||||
|
final FileStatus fs =
|
||||||
|
new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
|
||||||
|
defaultPermission, "", "", localDirs.get(0));
|
||||||
|
final FileStatus nmFs =
|
||||||
|
new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
|
||||||
|
nmPermission, "", "", localDirs.get(0));
|
||||||
|
|
||||||
|
final String user = "user0";
|
||||||
|
// init application
|
||||||
|
final Application app = mock(Application.class);
|
||||||
|
final ApplicationId appId =
|
||||||
|
BuilderUtils.newApplicationId(314159265358979L, 3);
|
||||||
|
when(app.getUser()).thenReturn(user);
|
||||||
|
when(app.getAppId()).thenReturn(appId);
|
||||||
|
when(app.toString()).thenReturn(ConverterUtils.toString(appId));
|
||||||
|
|
||||||
|
// init container.
|
||||||
|
final Container c = getMockContainer(appId, 42, user);
|
||||||
|
|
||||||
|
// setup local app dirs
|
||||||
|
List<String> tmpDirs = mockDirsHandler.getLocalDirs();
|
||||||
|
for (int i = 0; i < tmpDirs.size(); ++i) {
|
||||||
|
Path usersdir = new Path(tmpDirs.get(i), ContainerLocalizer.USERCACHE);
|
||||||
|
Path userdir = new Path(usersdir, user);
|
||||||
|
Path allAppsdir = new Path(userdir, ContainerLocalizer.APPCACHE);
|
||||||
|
Path appDir = new Path(allAppsdir, ConverterUtils.toString(appId));
|
||||||
|
Path containerDir =
|
||||||
|
new Path(appDir, ConverterUtils.toString(c.getContainerId()));
|
||||||
|
containerLocalDirs.add(containerDir);
|
||||||
|
appLocalDirs.add(appDir);
|
||||||
|
|
||||||
|
Path sysDir =
|
||||||
|
new Path(tmpDirs.get(i), ResourceLocalizationService.NM_PRIVATE_DIR);
|
||||||
|
Path appSysDir = new Path(sysDir, ConverterUtils.toString(appId));
|
||||||
|
Path containerSysDir =
|
||||||
|
new Path(appSysDir, ConverterUtils.toString(c.getContainerId()));
|
||||||
|
|
||||||
|
nmLocalContainerDirs.add(containerSysDir);
|
||||||
|
nmLocalAppDirs.add(appSysDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
spyService.init(conf);
|
||||||
|
spyService.start();
|
||||||
|
|
||||||
|
spyService.handle(new ApplicationLocalizationEvent(
|
||||||
|
LocalizationEventType.INIT_APPLICATION_RESOURCES, app));
|
||||||
|
dispatcher.await();
|
||||||
|
|
||||||
|
// Get a handle on the trackers after they're setup with
|
||||||
|
// INIT_APP_RESOURCES
|
||||||
|
LocalResourcesTracker appTracker =
|
||||||
|
spyService.getLocalResourcesTracker(
|
||||||
|
LocalResourceVisibility.APPLICATION, user, appId);
|
||||||
|
LocalResourcesTracker privTracker =
|
||||||
|
spyService.getLocalResourcesTracker(LocalResourceVisibility.PRIVATE,
|
||||||
|
user, appId);
|
||||||
|
LocalResourcesTracker pubTracker =
|
||||||
|
spyService.getLocalResourcesTracker(LocalResourceVisibility.PUBLIC,
|
||||||
|
user, appId);
|
||||||
|
|
||||||
|
// init resources
|
||||||
|
Random r = new Random();
|
||||||
|
long seed = r.nextLong();
|
||||||
|
r.setSeed(seed);
|
||||||
|
|
||||||
|
// Send localization requests, one for each type of resource
|
||||||
|
final LocalResource privResource = getPrivateMockedResource(r);
|
||||||
|
final LocalResourceRequest privReq =
|
||||||
|
new LocalResourceRequest(privResource);
|
||||||
|
|
||||||
|
final LocalResource appResource = getAppMockedResource(r);
|
||||||
|
final LocalResourceRequest appReq = new LocalResourceRequest(appResource);
|
||||||
|
|
||||||
|
final LocalResource pubResource = getPublicMockedResource(r);
|
||||||
|
final LocalResourceRequest pubReq = new LocalResourceRequest(pubResource);
|
||||||
|
|
||||||
|
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req =
|
||||||
|
new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
|
||||||
|
req.put(LocalResourceVisibility.PRIVATE,
|
||||||
|
Collections.singletonList(privReq));
|
||||||
|
req.put(LocalResourceVisibility.APPLICATION,
|
||||||
|
Collections.singletonList(appReq));
|
||||||
|
req
|
||||||
|
.put(LocalResourceVisibility.PUBLIC, Collections.singletonList(pubReq));
|
||||||
|
|
||||||
|
Map<LocalResourceVisibility, Collection<LocalResourceRequest>> req2 =
|
||||||
|
new HashMap<LocalResourceVisibility, Collection<LocalResourceRequest>>();
|
||||||
|
req2.put(LocalResourceVisibility.PRIVATE,
|
||||||
|
Collections.singletonList(privReq));
|
||||||
|
|
||||||
|
// Send Request event
|
||||||
|
spyService.handle(new ContainerLocalizationRequestEvent(c, req));
|
||||||
|
spyService.handle(new ContainerLocalizationRequestEvent(c, req2));
|
||||||
|
dispatcher.await();
|
||||||
|
|
||||||
|
int privRsrcCount = 0;
|
||||||
|
for (LocalizedResource lr : privTracker) {
|
||||||
|
privRsrcCount++;
|
||||||
|
Assert.assertEquals("Incorrect reference count", 2, lr.getRefCount());
|
||||||
|
Assert.assertEquals(privReq, lr.getRequest());
|
||||||
|
}
|
||||||
|
Assert.assertEquals(1, privRsrcCount);
|
||||||
|
|
||||||
|
int appRsrcCount = 0;
|
||||||
|
for (LocalizedResource lr : appTracker) {
|
||||||
|
appRsrcCount++;
|
||||||
|
Assert.assertEquals("Incorrect reference count", 1, lr.getRefCount());
|
||||||
|
Assert.assertEquals(appReq, lr.getRequest());
|
||||||
|
}
|
||||||
|
Assert.assertEquals(1, appRsrcCount);
|
||||||
|
|
||||||
|
int pubRsrcCount = 0;
|
||||||
|
for (LocalizedResource lr : pubTracker) {
|
||||||
|
pubRsrcCount++;
|
||||||
|
Assert.assertEquals("Incorrect reference count", 1, lr.getRefCount());
|
||||||
|
Assert.assertEquals(pubReq, lr.getRequest());
|
||||||
|
}
|
||||||
|
Assert.assertEquals(1, pubRsrcCount);
|
||||||
|
|
||||||
|
// setup mocks for test, a set of dirs with IOExceptions and let the rest
|
||||||
|
// go through
|
||||||
|
for (int i = 0; i < containerLocalDirs.size(); ++i) {
|
||||||
|
if (i == 2) {
|
||||||
|
Mockito.doThrow(new IOException()).when(spylfs)
|
||||||
|
.getFileStatus(eq(containerLocalDirs.get(i)));
|
||||||
|
Mockito.doThrow(new IOException()).when(spylfs)
|
||||||
|
.getFileStatus(eq(nmLocalContainerDirs.get(i)));
|
||||||
|
} else {
|
||||||
|
doReturn(fs).when(spylfs)
|
||||||
|
.getFileStatus(eq(containerLocalDirs.get(i)));
|
||||||
|
doReturn(nmFs).when(spylfs).getFileStatus(
|
||||||
|
eq(nmLocalContainerDirs.get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send Cleanup Event
|
||||||
|
spyService.handle(new ContainerLocalizationCleanupEvent(c, req));
|
||||||
|
verify(mockLocallilzerTracker).cleanupPrivLocalizers(
|
||||||
|
"container_314159265358979_0003_01_000042");
|
||||||
|
|
||||||
|
// match cleanup events with the mocks we setup earlier
|
||||||
|
for (int i = 0; i < containerLocalDirs.size(); ++i) {
|
||||||
|
if (i == 2) {
|
||||||
|
try {
|
||||||
|
verify(delService).delete(user, containerLocalDirs.get(i));
|
||||||
|
verify(delService).delete(null, nmLocalContainerDirs.get(i));
|
||||||
|
Assert.fail("deletion attempts for invalid dirs");
|
||||||
|
} catch (Throwable e) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
verify(delService).delete(user, containerLocalDirs.get(i));
|
||||||
|
verify(delService).delete(null, nmLocalContainerDirs.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ArgumentMatcher<ApplicationEvent> matchesAppDestroy =
|
||||||
|
new ArgumentMatcher<ApplicationEvent>() {
|
||||||
|
@Override
|
||||||
|
public boolean matches(Object o) {
|
||||||
|
ApplicationEvent evt = (ApplicationEvent) o;
|
||||||
|
return (evt.getType() == ApplicationEventType.APPLICATION_RESOURCES_CLEANEDUP)
|
||||||
|
&& appId == evt.getApplicationID();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
dispatcher.await();
|
||||||
|
|
||||||
|
// setup mocks again, this time throw UnsupportedFileSystemException and
|
||||||
|
// IOExceptions
|
||||||
|
for (int i = 0; i < containerLocalDirs.size(); ++i) {
|
||||||
|
if (i == 3) {
|
||||||
|
Mockito.doThrow(new IOException()).when(spylfs)
|
||||||
|
.getFileStatus(eq(appLocalDirs.get(i)));
|
||||||
|
Mockito.doThrow(new UnsupportedFileSystemException("test"))
|
||||||
|
.when(spylfs).getFileStatus(eq(nmLocalAppDirs.get(i)));
|
||||||
|
} else {
|
||||||
|
doReturn(fs).when(spylfs).getFileStatus(eq(appLocalDirs.get(i)));
|
||||||
|
doReturn(nmFs).when(spylfs).getFileStatus(eq(nmLocalAppDirs.get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LocalizationEvent destroyApp =
|
||||||
|
new ApplicationLocalizationEvent(
|
||||||
|
LocalizationEventType.DESTROY_APPLICATION_RESOURCES, app);
|
||||||
|
spyService.handle(destroyApp);
|
||||||
|
verify(applicationBus).handle(argThat(matchesAppDestroy));
|
||||||
|
|
||||||
|
// verify we got the right delete calls
|
||||||
|
for (int i = 0; i < containerLocalDirs.size(); ++i) {
|
||||||
|
if (i == 3) {
|
||||||
|
try {
|
||||||
|
verify(delService).delete(user, containerLocalDirs.get(i));
|
||||||
|
verify(delService).delete(null, nmLocalContainerDirs.get(i));
|
||||||
|
Assert.fail("deletion attempts for invalid dirs");
|
||||||
|
} catch (Throwable e) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
verify(delService).delete(user, appLocalDirs.get(i));
|
||||||
|
verify(delService).delete(null, nmLocalAppDirs.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
dispatcher.stop();
|
||||||
|
delService.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,6 +25,7 @@ import static org.mockito.Matchers.anyMap;
|
||||||
import static org.mockito.Matchers.eq;
|
import static org.mockito.Matchers.eq;
|
||||||
import static org.mockito.Matchers.isA;
|
import static org.mockito.Matchers.isA;
|
||||||
import static org.mockito.Mockito.atLeast;
|
import static org.mockito.Mockito.atLeast;
|
||||||
|
import static org.mockito.Mockito.doReturn;
|
||||||
import static org.mockito.Mockito.doThrow;
|
import static org.mockito.Mockito.doThrow;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.never;
|
import static org.mockito.Mockito.never;
|
||||||
|
@ -58,6 +59,8 @@ import org.junit.Assert;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.AbstractFileSystem;
|
||||||
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
@ -105,6 +108,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerM
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
|
||||||
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.TestNonAggregatingLogHandler;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
|
||||||
|
@ -137,11 +141,18 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
this.remoteRootLogDir.mkdir();
|
this.remoteRootLogDir.mkdir();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DrainDispatcher dispatcher;
|
||||||
|
EventHandler<ApplicationEvent> appEventHandler;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public void setup() throws IOException {
|
public void setup() throws IOException {
|
||||||
super.setup();
|
super.setup();
|
||||||
NodeId nodeId = NodeId.newInstance("0.0.0.0", 5555);
|
NodeId nodeId = NodeId.newInstance("0.0.0.0", 5555);
|
||||||
((NMContext)context).setNodeId(nodeId);
|
((NMContext)context).setNodeId(nodeId);
|
||||||
|
dispatcher = createDispatcher();
|
||||||
|
appEventHandler = mock(EventHandler.class);
|
||||||
|
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -149,10 +160,12 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
super.tearDown();
|
super.tearDown();
|
||||||
createContainerExecutor().deleteAsUser(user,
|
createContainerExecutor().deleteAsUser(user,
|
||||||
new Path(remoteRootLogDir.getAbsolutePath()), new Path[] {});
|
new Path(remoteRootLogDir.getAbsolutePath()), new Path[] {});
|
||||||
|
dispatcher.await();
|
||||||
|
dispatcher.stop();
|
||||||
|
dispatcher.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testLocalFileDeletionAfterUpload() throws Exception {
|
public void testLocalFileDeletionAfterUpload() throws Exception {
|
||||||
this.delSrvc = new DeletionService(createContainerExecutor());
|
this.delSrvc = new DeletionService(createContainerExecutor());
|
||||||
delSrvc = spy(delSrvc);
|
delSrvc = spy(delSrvc);
|
||||||
|
@ -161,10 +174,6 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
||||||
this.remoteRootLogDir.getAbsolutePath());
|
this.remoteRootLogDir.getAbsolutePath());
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LogAggregationService logAggregationService = spy(
|
LogAggregationService logAggregationService = spy(
|
||||||
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
||||||
super.dirsHandler));
|
super.dirsHandler));
|
||||||
|
@ -236,16 +245,11 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testNoContainerOnNode() throws Exception {
|
public void testNoContainerOnNode() throws Exception {
|
||||||
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
|
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
|
||||||
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
||||||
this.remoteRootLogDir.getAbsolutePath());
|
this.remoteRootLogDir.getAbsolutePath());
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LogAggregationService logAggregationService =
|
LogAggregationService logAggregationService =
|
||||||
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
||||||
super.dirsHandler);
|
super.dirsHandler);
|
||||||
|
@ -285,6 +289,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
};
|
};
|
||||||
checkEvents(appEventHandler, expectedEvents, true, "getType", "getApplicationID");
|
checkEvents(appEventHandler, expectedEvents, true, "getType", "getApplicationID");
|
||||||
dispatcher.stop();
|
dispatcher.stop();
|
||||||
|
logAggregationService.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -294,6 +299,7 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
|
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
|
||||||
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
||||||
this.remoteRootLogDir.getAbsolutePath());
|
this.remoteRootLogDir.getAbsolutePath());
|
||||||
|
|
||||||
String[] fileNames = new String[] { "stdout", "stderr", "syslog" };
|
String[] fileNames = new String[] { "stdout", "stderr", "syslog" };
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
DrainDispatcher dispatcher = createDispatcher();
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
||||||
|
@ -432,17 +438,12 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testVerifyAndCreateRemoteDirsFailure()
|
public void testVerifyAndCreateRemoteDirsFailure()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
|
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
|
||||||
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
||||||
this.remoteRootLogDir.getAbsolutePath());
|
this.remoteRootLogDir.getAbsolutePath());
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LogAggregationService logAggregationService = spy(
|
LogAggregationService logAggregationService = spy(
|
||||||
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
||||||
super.dirsHandler));
|
super.dirsHandler));
|
||||||
|
@ -456,8 +457,9 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
logAggregationService.start();
|
logAggregationService.start();
|
||||||
|
|
||||||
// Now try to start an application
|
// Now try to start an application
|
||||||
ApplicationId appId = BuilderUtils.newApplicationId(
|
ApplicationId appId =
|
||||||
System.currentTimeMillis(), (int)Math.random());
|
BuilderUtils.newApplicationId(System.currentTimeMillis(),
|
||||||
|
(int) (Math.random() * 1000));
|
||||||
logAggregationService.handle(new LogHandlerAppStartedEvent(appId,
|
logAggregationService.handle(new LogHandlerAppStartedEvent(appId,
|
||||||
this.user, null,
|
this.user, null,
|
||||||
ContainerLogsRetentionPolicy.AM_AND_FAILED_CONTAINERS_ONLY,
|
ContainerLogsRetentionPolicy.AM_AND_FAILED_CONTAINERS_ONLY,
|
||||||
|
@ -475,8 +477,9 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
Mockito.reset(logAggregationService);
|
Mockito.reset(logAggregationService);
|
||||||
|
|
||||||
// Now try to start another one
|
// Now try to start another one
|
||||||
ApplicationId appId2 = BuilderUtils.newApplicationId(
|
ApplicationId appId2 =
|
||||||
System.currentTimeMillis(), (int)Math.random());
|
BuilderUtils.newApplicationId(System.currentTimeMillis(),
|
||||||
|
(int) (Math.random() * 1000));
|
||||||
File appLogDir =
|
File appLogDir =
|
||||||
new File(localLogDir, ConverterUtils.toString(appId2));
|
new File(localLogDir, ConverterUtils.toString(appId2));
|
||||||
appLogDir.mkdir();
|
appLogDir.mkdir();
|
||||||
|
@ -578,6 +581,8 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
aggSvc.handle(new LogHandlerAppStartedEvent(appId3, this.user, null,
|
aggSvc.handle(new LogHandlerAppStartedEvent(appId3, this.user, null,
|
||||||
ContainerLogsRetentionPolicy.ALL_CONTAINERS, this.acls));
|
ContainerLogsRetentionPolicy.ALL_CONTAINERS, this.acls));
|
||||||
verify(spyFs, never()).mkdirs(eq(appDir3), isA(FsPermission.class));
|
verify(spyFs, never()).mkdirs(eq(appDir3), isA(FsPermission.class));
|
||||||
|
aggSvc.stop();
|
||||||
|
aggSvc.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -589,18 +594,15 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
||||||
this.remoteRootLogDir.getAbsolutePath());
|
this.remoteRootLogDir.getAbsolutePath());
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LogAggregationService logAggregationService = spy(
|
LogAggregationService logAggregationService = spy(
|
||||||
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
||||||
super.dirsHandler));
|
super.dirsHandler));
|
||||||
logAggregationService.init(this.conf);
|
logAggregationService.init(this.conf);
|
||||||
logAggregationService.start();
|
logAggregationService.start();
|
||||||
|
|
||||||
ApplicationId appId = BuilderUtils.newApplicationId(
|
ApplicationId appId =
|
||||||
System.currentTimeMillis(), (int)Math.random());
|
BuilderUtils.newApplicationId(System.currentTimeMillis(),
|
||||||
|
(int) (Math.random() * 1000));
|
||||||
doThrow(new YarnRuntimeException("KABOOM!"))
|
doThrow(new YarnRuntimeException("KABOOM!"))
|
||||||
.when(logAggregationService).initAppAggregator(
|
.when(logAggregationService).initAppAggregator(
|
||||||
eq(appId), eq(user), any(Credentials.class),
|
eq(appId), eq(user), any(Credentials.class),
|
||||||
|
@ -634,7 +636,6 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testLogAggregationCreateDirsFailsWithoutKillingNM()
|
public void testLogAggregationCreateDirsFailsWithoutKillingNM()
|
||||||
throws Exception {
|
throws Exception {
|
||||||
|
|
||||||
|
@ -642,18 +643,15 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
||||||
this.remoteRootLogDir.getAbsolutePath());
|
this.remoteRootLogDir.getAbsolutePath());
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LogAggregationService logAggregationService = spy(
|
LogAggregationService logAggregationService = spy(
|
||||||
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
new LogAggregationService(dispatcher, this.context, this.delSrvc,
|
||||||
super.dirsHandler));
|
super.dirsHandler));
|
||||||
logAggregationService.init(this.conf);
|
logAggregationService.init(this.conf);
|
||||||
logAggregationService.start();
|
logAggregationService.start();
|
||||||
|
|
||||||
ApplicationId appId = BuilderUtils.newApplicationId(
|
ApplicationId appId =
|
||||||
System.currentTimeMillis(), (int)Math.random());
|
BuilderUtils.newApplicationId(System.currentTimeMillis(),
|
||||||
|
(int) (Math.random() * 1000));
|
||||||
Exception e = new RuntimeException("KABOOM!");
|
Exception e = new RuntimeException("KABOOM!");
|
||||||
doThrow(e)
|
doThrow(e)
|
||||||
.when(logAggregationService).createAppDir(any(String.class),
|
.when(logAggregationService).createAppDir(any(String.class),
|
||||||
|
@ -905,7 +903,6 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=20000)
|
@Test(timeout=20000)
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testStopAfterError() throws Exception {
|
public void testStopAfterError() throws Exception {
|
||||||
DeletionService delSrvc = mock(DeletionService.class);
|
DeletionService delSrvc = mock(DeletionService.class);
|
||||||
|
|
||||||
|
@ -913,10 +910,6 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
LocalDirsHandlerService mockedDirSvc = mock(LocalDirsHandlerService.class);
|
LocalDirsHandlerService mockedDirSvc = mock(LocalDirsHandlerService.class);
|
||||||
when(mockedDirSvc.getLogDirs()).thenThrow(new RuntimeException());
|
when(mockedDirSvc.getLogDirs()).thenThrow(new RuntimeException());
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LogAggregationService logAggregationService =
|
LogAggregationService logAggregationService =
|
||||||
new LogAggregationService(dispatcher, this.context, delSrvc,
|
new LogAggregationService(dispatcher, this.context, delSrvc,
|
||||||
mockedDirSvc);
|
mockedDirSvc);
|
||||||
|
@ -930,20 +923,16 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
|
|
||||||
logAggregationService.stop();
|
logAggregationService.stop();
|
||||||
assertEquals(0, logAggregationService.getNumAggregators());
|
assertEquals(0, logAggregationService.getNumAggregators());
|
||||||
|
logAggregationService.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("unchecked")
|
|
||||||
public void testLogAggregatorCleanup() throws Exception {
|
public void testLogAggregatorCleanup() throws Exception {
|
||||||
DeletionService delSrvc = mock(DeletionService.class);
|
DeletionService delSrvc = mock(DeletionService.class);
|
||||||
|
|
||||||
// get the AppLogAggregationImpl thread to crash
|
// get the AppLogAggregationImpl thread to crash
|
||||||
LocalDirsHandlerService mockedDirSvc = mock(LocalDirsHandlerService.class);
|
LocalDirsHandlerService mockedDirSvc = mock(LocalDirsHandlerService.class);
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher();
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LogAggregationService logAggregationService =
|
LogAggregationService logAggregationService =
|
||||||
new LogAggregationService(dispatcher, this.context, delSrvc,
|
new LogAggregationService(dispatcher, this.context, delSrvc,
|
||||||
mockedDirSvc);
|
mockedDirSvc);
|
||||||
|
@ -964,6 +953,8 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
}
|
}
|
||||||
Assert.assertEquals("Log aggregator failed to cleanup!", 0,
|
Assert.assertEquals("Log aggregator failed to cleanup!", 0,
|
||||||
logAggregationService.getNumAggregators());
|
logAggregationService.getNumAggregators());
|
||||||
|
logAggregationService.stop();
|
||||||
|
logAggregationService.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
|
@ -1039,6 +1030,72 @@ public class TestLogAggregationService extends BaseContainerManagerTest {
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test to make sure we handle cases where the directories we get back from
|
||||||
|
* the LocalDirsHandler may have issues including the log dir not being
|
||||||
|
* present as well as other issues. The test uses helper functions from
|
||||||
|
* TestNonAggregatingLogHandler.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFailedDirsLocalFileDeletionAfterUpload() throws Exception {
|
||||||
|
|
||||||
|
// setup conf and services
|
||||||
|
DeletionService mockDelService = mock(DeletionService.class);
|
||||||
|
File[] localLogDirs =
|
||||||
|
TestNonAggregatingLogHandler.getLocalLogDirFiles(this.getClass()
|
||||||
|
.getName(), 7);
|
||||||
|
final List<String> localLogDirPaths =
|
||||||
|
new ArrayList<String>(localLogDirs.length);
|
||||||
|
for (int i = 0; i < localLogDirs.length; i++) {
|
||||||
|
localLogDirPaths.add(localLogDirs[i].getAbsolutePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
String localLogDirsString = StringUtils.join(localLogDirPaths, ",");
|
||||||
|
|
||||||
|
this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDirsString);
|
||||||
|
this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR,
|
||||||
|
this.remoteRootLogDir.getAbsolutePath());
|
||||||
|
this.conf.setLong(YarnConfiguration.NM_DISK_HEALTH_CHECK_INTERVAL_MS, 500);
|
||||||
|
|
||||||
|
ApplicationId application1 = BuilderUtils.newApplicationId(1234, 1);
|
||||||
|
ApplicationAttemptId appAttemptId =
|
||||||
|
BuilderUtils.newApplicationAttemptId(application1, 1);
|
||||||
|
|
||||||
|
this.dirsHandler = new LocalDirsHandlerService();
|
||||||
|
LocalDirsHandlerService mockDirsHandler = mock(LocalDirsHandlerService.class);
|
||||||
|
|
||||||
|
LogAggregationService logAggregationService =
|
||||||
|
spy(new LogAggregationService(dispatcher, this.context, mockDelService,
|
||||||
|
mockDirsHandler));
|
||||||
|
AbstractFileSystem spylfs =
|
||||||
|
spy(FileContext.getLocalFSFileContext().getDefaultFileSystem());
|
||||||
|
FileContext lfs = FileContext.getFileContext(spylfs, conf);
|
||||||
|
doReturn(lfs).when(logAggregationService).getLocalFileContext(
|
||||||
|
isA(Configuration.class));
|
||||||
|
|
||||||
|
logAggregationService.init(this.conf);
|
||||||
|
logAggregationService.start();
|
||||||
|
|
||||||
|
TestNonAggregatingLogHandler.runMockedFailedDirs(logAggregationService,
|
||||||
|
application1, user, mockDelService, mockDirsHandler, conf, spylfs, lfs,
|
||||||
|
localLogDirs);
|
||||||
|
|
||||||
|
logAggregationService.stop();
|
||||||
|
assertEquals(0, logAggregationService.getNumAggregators());
|
||||||
|
verify(logAggregationService).closeFileSystems(
|
||||||
|
any(UserGroupInformation.class));
|
||||||
|
|
||||||
|
ApplicationEvent expectedEvents[] =
|
||||||
|
new ApplicationEvent[] {
|
||||||
|
new ApplicationEvent(appAttemptId.getApplicationId(),
|
||||||
|
ApplicationEventType.APPLICATION_LOG_HANDLING_INITED),
|
||||||
|
new ApplicationEvent(appAttemptId.getApplicationId(),
|
||||||
|
ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED) };
|
||||||
|
|
||||||
|
checkEvents(appEventHandler, expectedEvents, true, "getType",
|
||||||
|
"getApplicationID");
|
||||||
|
}
|
||||||
|
|
||||||
@Test (timeout = 50000)
|
@Test (timeout = 50000)
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public void testLogAggregationServiceWithPatterns() throws Exception {
|
public void testLogAggregationServiceWithPatterns() throws Exception {
|
||||||
|
|
|
@ -19,15 +19,36 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler;
|
||||||
|
|
||||||
import static org.mockito.Matchers.any;
|
import static org.mockito.Matchers.any;
|
||||||
import static org.mockito.Matchers.eq;
|
import static org.mockito.Matchers.eq;
|
||||||
|
import static org.mockito.Matchers.isA;
|
||||||
|
import static org.mockito.Mockito.doReturn;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.spy;
|
||||||
import static org.mockito.Mockito.verify;
|
import static org.mockito.Mockito.verify;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.NotSerializableException;
|
||||||
|
import java.io.ObjectInputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
import java.util.concurrent.ScheduledThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.apache.commons.io.FileUtils;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.commons.lang.builder.EqualsBuilder;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.AbstractFileSystem;
|
||||||
|
import org.apache.hadoop.fs.FileContext;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||||
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.hadoop.security.AccessControlException;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
@ -45,25 +66,52 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.eve
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent;
|
||||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent;
|
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerContainerFinishedEvent;
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
import org.mockito.ArgumentMatcher;
|
||||||
|
import org.mockito.Mockito;
|
||||||
import org.mockito.exceptions.verification.WantedButNotInvoked;
|
import org.mockito.exceptions.verification.WantedButNotInvoked;
|
||||||
|
import org.mockito.internal.matchers.VarargMatcher;
|
||||||
|
|
||||||
public class TestNonAggregatingLogHandler {
|
public class TestNonAggregatingLogHandler {
|
||||||
|
|
||||||
@Test
|
DeletionService mockDelService;
|
||||||
@SuppressWarnings("unchecked")
|
Configuration conf;
|
||||||
public void testLogDeletion() {
|
DrainDispatcher dispatcher;
|
||||||
DeletionService delService = mock(DeletionService.class);
|
EventHandler<ApplicationEvent> appEventHandler;
|
||||||
Configuration conf = new YarnConfiguration();
|
|
||||||
String user = "testuser";
|
String user = "testuser";
|
||||||
|
ApplicationId appId;
|
||||||
|
ApplicationAttemptId appAttemptId;
|
||||||
|
ContainerId container11;
|
||||||
|
LocalDirsHandlerService dirsHandler;
|
||||||
|
|
||||||
File[] localLogDirs = new File[2];
|
@Before
|
||||||
localLogDirs[0] =
|
@SuppressWarnings("unchecked")
|
||||||
new File("target", this.getClass().getName() + "-localLogDir0")
|
public void setup() {
|
||||||
.getAbsoluteFile();
|
mockDelService = mock(DeletionService.class);
|
||||||
localLogDirs[1] =
|
conf = new YarnConfiguration();
|
||||||
new File("target", this.getClass().getName() + "-localLogDir1")
|
dispatcher = createDispatcher(conf);
|
||||||
.getAbsoluteFile();
|
appEventHandler = mock(EventHandler.class);
|
||||||
|
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
||||||
|
appId = BuilderUtils.newApplicationId(1234, 1);
|
||||||
|
appAttemptId = BuilderUtils.newApplicationAttemptId(appId, 1);
|
||||||
|
container11 = BuilderUtils.newContainerId(appAttemptId, 1);
|
||||||
|
dirsHandler = new LocalDirsHandlerService();
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws IOException {
|
||||||
|
dirsHandler.stop();
|
||||||
|
dirsHandler.close();
|
||||||
|
dispatcher.await();
|
||||||
|
dispatcher.stop();
|
||||||
|
dispatcher.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLogDeletion() throws IOException {
|
||||||
|
File[] localLogDirs = getLocalLogDirFiles(this.getClass().getName(), 2);
|
||||||
String localLogDirsString =
|
String localLogDirsString =
|
||||||
localLogDirs[0].getAbsolutePath() + ","
|
localLogDirs[0].getAbsolutePath() + ","
|
||||||
+ localLogDirs[1].getAbsolutePath();
|
+ localLogDirs[1].getAbsolutePath();
|
||||||
|
@ -72,72 +120,50 @@ public class TestNonAggregatingLogHandler {
|
||||||
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, false);
|
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, false);
|
||||||
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 0l);
|
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 0l);
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher(conf);
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
|
|
||||||
dirsHandler.init(conf);
|
dirsHandler.init(conf);
|
||||||
|
|
||||||
ApplicationId appId1 = BuilderUtils.newApplicationId(1234, 1);
|
NonAggregatingLogHandler rawLogHandler =
|
||||||
ApplicationAttemptId appAttemptId1 =
|
new NonAggregatingLogHandler(dispatcher, mockDelService, dirsHandler);
|
||||||
BuilderUtils.newApplicationAttemptId(appId1, 1);
|
NonAggregatingLogHandler logHandler = spy(rawLogHandler);
|
||||||
ContainerId container11 = BuilderUtils.newContainerId(appAttemptId1, 1);
|
AbstractFileSystem spylfs =
|
||||||
|
spy(FileContext.getLocalFSFileContext().getDefaultFileSystem());
|
||||||
|
FileContext lfs = FileContext.getFileContext(spylfs, conf);
|
||||||
|
doReturn(lfs).when(logHandler)
|
||||||
|
.getLocalFileContext(isA(Configuration.class));
|
||||||
|
FsPermission defaultPermission =
|
||||||
|
FsPermission.getDirDefault().applyUMask(lfs.getUMask());
|
||||||
|
final FileStatus fs =
|
||||||
|
new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
|
||||||
|
defaultPermission, "", "",
|
||||||
|
new Path(localLogDirs[0].getAbsolutePath()));
|
||||||
|
doReturn(fs).when(spylfs).getFileStatus(isA(Path.class));
|
||||||
|
|
||||||
NonAggregatingLogHandler logHandler =
|
|
||||||
new NonAggregatingLogHandler(dispatcher, delService, dirsHandler);
|
|
||||||
logHandler.init(conf);
|
logHandler.init(conf);
|
||||||
logHandler.start();
|
logHandler.start();
|
||||||
|
|
||||||
logHandler.handle(new LogHandlerAppStartedEvent(appId1, user, null,
|
logHandler.handle(new LogHandlerAppStartedEvent(appId, user, null,
|
||||||
ContainerLogsRetentionPolicy.ALL_CONTAINERS, null));
|
ContainerLogsRetentionPolicy.ALL_CONTAINERS, null));
|
||||||
|
|
||||||
logHandler.handle(new LogHandlerContainerFinishedEvent(container11, 0));
|
logHandler.handle(new LogHandlerContainerFinishedEvent(container11, 0));
|
||||||
|
|
||||||
logHandler.handle(new LogHandlerAppFinishedEvent(appId1));
|
logHandler.handle(new LogHandlerAppFinishedEvent(appId));
|
||||||
|
|
||||||
Path[] localAppLogDirs = new Path[2];
|
Path[] localAppLogDirs = new Path[2];
|
||||||
localAppLogDirs[0] =
|
localAppLogDirs[0] =
|
||||||
new Path(localLogDirs[0].getAbsolutePath(), appId1.toString());
|
new Path(localLogDirs[0].getAbsolutePath(), appId.toString());
|
||||||
localAppLogDirs[1] =
|
localAppLogDirs[1] =
|
||||||
new Path(localLogDirs[1].getAbsolutePath(), appId1.toString());
|
new Path(localLogDirs[1].getAbsolutePath(), appId.toString());
|
||||||
|
|
||||||
// 5 seconds for the delete which is a separate thread.
|
testDeletionServiceCall(mockDelService, user, 5000, localAppLogDirs);
|
||||||
long verifyStartTime = System.currentTimeMillis();
|
logHandler.close();
|
||||||
WantedButNotInvoked notInvokedException = null;
|
for (int i = 0; i < localLogDirs.length; i++) {
|
||||||
boolean matched = false;
|
FileUtils.deleteDirectory(localLogDirs[i]);
|
||||||
while (!matched && System.currentTimeMillis() < verifyStartTime + 5000l) {
|
|
||||||
try {
|
|
||||||
verify(delService).delete(eq(user), (Path) eq(null),
|
|
||||||
eq(localAppLogDirs[0]), eq(localAppLogDirs[1]));
|
|
||||||
matched = true;
|
|
||||||
} catch (WantedButNotInvoked e) {
|
|
||||||
notInvokedException = e;
|
|
||||||
try {
|
|
||||||
Thread.sleep(50l);
|
|
||||||
} catch (InterruptedException i) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!matched) {
|
|
||||||
throw notInvokedException;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("unchecked")
|
public void testDelayedDelete() throws IOException {
|
||||||
public void testDelayedDelete() {
|
File[] localLogDirs = getLocalLogDirFiles(this.getClass().getName(), 2);
|
||||||
DeletionService delService = mock(DeletionService.class);
|
|
||||||
Configuration conf = new YarnConfiguration();
|
|
||||||
String user = "testuser";
|
|
||||||
|
|
||||||
File[] localLogDirs = new File[2];
|
|
||||||
localLogDirs[0] =
|
|
||||||
new File("target", this.getClass().getName() + "-localLogDir0")
|
|
||||||
.getAbsoluteFile();
|
|
||||||
localLogDirs[1] =
|
|
||||||
new File("target", this.getClass().getName() + "-localLogDir1")
|
|
||||||
.getAbsoluteFile();
|
|
||||||
String localLogDirsString =
|
String localLogDirsString =
|
||||||
localLogDirs[0].getAbsolutePath() + ","
|
localLogDirs[0].getAbsolutePath() + ","
|
||||||
+ localLogDirs[1].getAbsolutePath();
|
+ localLogDirs[1].getAbsolutePath();
|
||||||
|
@ -148,42 +174,36 @@ public class TestNonAggregatingLogHandler {
|
||||||
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS,
|
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS,
|
||||||
YarnConfiguration.DEFAULT_NM_LOG_RETAIN_SECONDS);
|
YarnConfiguration.DEFAULT_NM_LOG_RETAIN_SECONDS);
|
||||||
|
|
||||||
DrainDispatcher dispatcher = createDispatcher(conf);
|
|
||||||
EventHandler<ApplicationEvent> appEventHandler = mock(EventHandler.class);
|
|
||||||
dispatcher.register(ApplicationEventType.class, appEventHandler);
|
|
||||||
|
|
||||||
LocalDirsHandlerService dirsHandler = new LocalDirsHandlerService();
|
|
||||||
dirsHandler.init(conf);
|
dirsHandler.init(conf);
|
||||||
|
|
||||||
ApplicationId appId1 = BuilderUtils.newApplicationId(1234, 1);
|
|
||||||
ApplicationAttemptId appAttemptId1 =
|
|
||||||
BuilderUtils.newApplicationAttemptId(appId1, 1);
|
|
||||||
ContainerId container11 = BuilderUtils.newContainerId(appAttemptId1, 1);
|
|
||||||
|
|
||||||
NonAggregatingLogHandler logHandler =
|
NonAggregatingLogHandler logHandler =
|
||||||
new NonAggregatingLogHandlerWithMockExecutor(dispatcher, delService,
|
new NonAggregatingLogHandlerWithMockExecutor(dispatcher, mockDelService,
|
||||||
dirsHandler);
|
dirsHandler);
|
||||||
logHandler.init(conf);
|
logHandler.init(conf);
|
||||||
logHandler.start();
|
logHandler.start();
|
||||||
|
|
||||||
logHandler.handle(new LogHandlerAppStartedEvent(appId1, user, null,
|
logHandler.handle(new LogHandlerAppStartedEvent(appId, user, null,
|
||||||
ContainerLogsRetentionPolicy.ALL_CONTAINERS, null));
|
ContainerLogsRetentionPolicy.ALL_CONTAINERS, null));
|
||||||
|
|
||||||
logHandler.handle(new LogHandlerContainerFinishedEvent(container11, 0));
|
logHandler.handle(new LogHandlerContainerFinishedEvent(container11, 0));
|
||||||
|
|
||||||
logHandler.handle(new LogHandlerAppFinishedEvent(appId1));
|
logHandler.handle(new LogHandlerAppFinishedEvent(appId));
|
||||||
|
|
||||||
Path[] localAppLogDirs = new Path[2];
|
Path[] localAppLogDirs = new Path[2];
|
||||||
localAppLogDirs[0] =
|
localAppLogDirs[0] =
|
||||||
new Path(localLogDirs[0].getAbsolutePath(), appId1.toString());
|
new Path(localLogDirs[0].getAbsolutePath(), appId.toString());
|
||||||
localAppLogDirs[1] =
|
localAppLogDirs[1] =
|
||||||
new Path(localLogDirs[1].getAbsolutePath(), appId1.toString());
|
new Path(localLogDirs[1].getAbsolutePath(), appId.toString());
|
||||||
|
|
||||||
ScheduledThreadPoolExecutor mockSched =
|
ScheduledThreadPoolExecutor mockSched =
|
||||||
((NonAggregatingLogHandlerWithMockExecutor) logHandler).mockSched;
|
((NonAggregatingLogHandlerWithMockExecutor) logHandler).mockSched;
|
||||||
|
|
||||||
verify(mockSched).schedule(any(Runnable.class), eq(10800l),
|
verify(mockSched).schedule(any(Runnable.class), eq(10800l),
|
||||||
eq(TimeUnit.SECONDS));
|
eq(TimeUnit.SECONDS));
|
||||||
|
logHandler.close();
|
||||||
|
for (int i = 0; i < localLogDirs.length; i++) {
|
||||||
|
FileUtils.deleteDirectory(localLogDirs[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -202,25 +222,25 @@ public class TestNonAggregatingLogHandler {
|
||||||
verify(logHandler.mockSched)
|
verify(logHandler.mockSched)
|
||||||
.awaitTermination(eq(10l), eq(TimeUnit.SECONDS));
|
.awaitTermination(eq(10l), eq(TimeUnit.SECONDS));
|
||||||
verify(logHandler.mockSched).shutdownNow();
|
verify(logHandler.mockSched).shutdownNow();
|
||||||
|
logHandler.close();
|
||||||
|
aggregatingLogHandler.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testHandlingApplicationFinishedEvent() {
|
public void testHandlingApplicationFinishedEvent() throws IOException {
|
||||||
Configuration conf = new Configuration();
|
|
||||||
LocalDirsHandlerService dirsService = new LocalDirsHandlerService();
|
|
||||||
DeletionService delService = new DeletionService(null);
|
DeletionService delService = new DeletionService(null);
|
||||||
NonAggregatingLogHandler aggregatingLogHandler =
|
NonAggregatingLogHandler aggregatingLogHandler =
|
||||||
new NonAggregatingLogHandler(new InlineDispatcher(),
|
new NonAggregatingLogHandler(new InlineDispatcher(),
|
||||||
delService,
|
delService,
|
||||||
dirsService);
|
dirsHandler);
|
||||||
|
|
||||||
dirsService.init(conf);
|
dirsHandler.init(conf);
|
||||||
dirsService.start();
|
dirsHandler.start();
|
||||||
delService.init(conf);
|
delService.init(conf);
|
||||||
delService.start();
|
delService.start();
|
||||||
aggregatingLogHandler.init(conf);
|
aggregatingLogHandler.init(conf);
|
||||||
aggregatingLogHandler.start();
|
aggregatingLogHandler.start();
|
||||||
ApplicationId appId = BuilderUtils.newApplicationId(1234, 1);
|
|
||||||
// It should NOT throw RejectedExecutionException
|
// It should NOT throw RejectedExecutionException
|
||||||
aggregatingLogHandler.handle(new LogHandlerAppFinishedEvent(appId));
|
aggregatingLogHandler.handle(new LogHandlerAppFinishedEvent(appId));
|
||||||
aggregatingLogHandler.stop();
|
aggregatingLogHandler.stop();
|
||||||
|
@ -228,6 +248,7 @@ public class TestNonAggregatingLogHandler {
|
||||||
// It should NOT throw RejectedExecutionException after stopping
|
// It should NOT throw RejectedExecutionException after stopping
|
||||||
// handler service.
|
// handler service.
|
||||||
aggregatingLogHandler.handle(new LogHandlerAppFinishedEvent(appId));
|
aggregatingLogHandler.handle(new LogHandlerAppFinishedEvent(appId));
|
||||||
|
aggregatingLogHandler.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private class NonAggregatingLogHandlerWithMockExecutor extends
|
private class NonAggregatingLogHandlerWithMockExecutor extends
|
||||||
|
@ -255,4 +276,201 @@ public class TestNonAggregatingLogHandler {
|
||||||
dispatcher.start();
|
dispatcher.start();
|
||||||
return dispatcher;
|
return dispatcher;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test to ensure that we handle the cleanup of directories that may not have
|
||||||
|
* the application log dirs we're trying to delete or may have other problems.
|
||||||
|
* Test creates 7 log dirs, and fails the directory check for 4 of them and
|
||||||
|
* then checks to ensure we tried to delete only the ones that passed the
|
||||||
|
* check.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFailedDirLogDeletion() throws Exception {
|
||||||
|
|
||||||
|
File[] localLogDirs = getLocalLogDirFiles(this.getClass().getName(), 7);
|
||||||
|
final List<String> localLogDirPaths =
|
||||||
|
new ArrayList<String>(localLogDirs.length);
|
||||||
|
for (int i = 0; i < localLogDirs.length; i++) {
|
||||||
|
localLogDirPaths.add(localLogDirs[i].getAbsolutePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
String localLogDirsString = StringUtils.join(localLogDirPaths, ",");
|
||||||
|
|
||||||
|
conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDirsString);
|
||||||
|
conf.setBoolean(YarnConfiguration.LOG_AGGREGATION_ENABLED, false);
|
||||||
|
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 0l);
|
||||||
|
|
||||||
|
LocalDirsHandlerService mockDirsHandler = mock(LocalDirsHandlerService.class);
|
||||||
|
|
||||||
|
NonAggregatingLogHandler rawLogHandler =
|
||||||
|
new NonAggregatingLogHandler(dispatcher, mockDelService, mockDirsHandler);
|
||||||
|
NonAggregatingLogHandler logHandler = spy(rawLogHandler);
|
||||||
|
AbstractFileSystem spylfs =
|
||||||
|
spy(FileContext.getLocalFSFileContext().getDefaultFileSystem());
|
||||||
|
FileContext lfs = FileContext.getFileContext(spylfs, conf);
|
||||||
|
doReturn(lfs).when(logHandler)
|
||||||
|
.getLocalFileContext(isA(Configuration.class));
|
||||||
|
logHandler.init(conf);
|
||||||
|
logHandler.start();
|
||||||
|
runMockedFailedDirs(logHandler, appId, user, mockDelService,
|
||||||
|
mockDirsHandler, conf, spylfs, lfs, localLogDirs);
|
||||||
|
logHandler.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function to run a log handler with directories failing the getFileStatus
|
||||||
|
* call. The function accepts the log handler, setup the mocks to fail with
|
||||||
|
* specific exceptions and ensures the deletion service has the correct calls.
|
||||||
|
*
|
||||||
|
* @param logHandler the logHandler implementation to test
|
||||||
|
*
|
||||||
|
* @param appId the application id that we wish when sending events to the log
|
||||||
|
* handler
|
||||||
|
*
|
||||||
|
* @param user the user name to use
|
||||||
|
*
|
||||||
|
* @param mockDelService a mock of the DeletionService which we will verify
|
||||||
|
* the delete calls against
|
||||||
|
*
|
||||||
|
* @param dirsHandler a spy or mock on the LocalDirsHandler service used to
|
||||||
|
* when creating the logHandler. It needs to be a spy so that we can intercept
|
||||||
|
* the getAllLogDirs() call.
|
||||||
|
*
|
||||||
|
* @param conf the configuration used
|
||||||
|
*
|
||||||
|
* @param spylfs a spy on the AbstractFileSystem object used when creating lfs
|
||||||
|
*
|
||||||
|
* @param lfs the FileContext object to be used to mock the getFileStatus()
|
||||||
|
* calls
|
||||||
|
*
|
||||||
|
* @param localLogDirs list of the log dirs to run the test against, must have
|
||||||
|
* at least 7 entries
|
||||||
|
*/
|
||||||
|
public static void runMockedFailedDirs(LogHandler logHandler,
|
||||||
|
ApplicationId appId, String user, DeletionService mockDelService,
|
||||||
|
LocalDirsHandlerService dirsHandler, Configuration conf,
|
||||||
|
AbstractFileSystem spylfs, FileContext lfs, File[] localLogDirs)
|
||||||
|
throws Exception {
|
||||||
|
Map<ApplicationAccessType, String> appAcls = new HashMap<ApplicationAccessType, String>();
|
||||||
|
if (localLogDirs.length < 7) {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Argument localLogDirs must be at least of length 7");
|
||||||
|
}
|
||||||
|
Path[] localAppLogDirPaths = new Path[localLogDirs.length];
|
||||||
|
for (int i = 0; i < localAppLogDirPaths.length; i++) {
|
||||||
|
localAppLogDirPaths[i] =
|
||||||
|
new Path(localLogDirs[i].getAbsolutePath(), appId.toString());
|
||||||
|
}
|
||||||
|
final List<String> localLogDirPaths =
|
||||||
|
new ArrayList<String>(localLogDirs.length);
|
||||||
|
for (int i = 0; i < localLogDirs.length; i++) {
|
||||||
|
localLogDirPaths.add(localLogDirs[i].getAbsolutePath());
|
||||||
|
}
|
||||||
|
|
||||||
|
// setup mocks
|
||||||
|
FsPermission defaultPermission =
|
||||||
|
FsPermission.getDirDefault().applyUMask(lfs.getUMask());
|
||||||
|
final FileStatus fs =
|
||||||
|
new FileStatus(0, true, 1, 0, System.currentTimeMillis(), 0,
|
||||||
|
defaultPermission, "", "",
|
||||||
|
new Path(localLogDirs[0].getAbsolutePath()));
|
||||||
|
doReturn(fs).when(spylfs).getFileStatus(isA(Path.class));
|
||||||
|
doReturn(localLogDirPaths).when(dirsHandler).getLogDirsForCleanup();
|
||||||
|
|
||||||
|
logHandler.handle(new LogHandlerAppStartedEvent(appId, user, null,
|
||||||
|
ContainerLogsRetentionPolicy.ALL_CONTAINERS, appAcls));
|
||||||
|
|
||||||
|
// test case where some dirs have the log dir to delete
|
||||||
|
// mock some dirs throwing various exceptions
|
||||||
|
// verify deletion happens only on the others
|
||||||
|
Mockito.doThrow(new FileNotFoundException()).when(spylfs)
|
||||||
|
.getFileStatus(eq(localAppLogDirPaths[0]));
|
||||||
|
doReturn(fs).when(spylfs).getFileStatus(eq(localAppLogDirPaths[1]));
|
||||||
|
Mockito.doThrow(new AccessControlException()).when(spylfs)
|
||||||
|
.getFileStatus(eq(localAppLogDirPaths[2]));
|
||||||
|
doReturn(fs).when(spylfs).getFileStatus(eq(localAppLogDirPaths[3]));
|
||||||
|
Mockito.doThrow(new IOException()).when(spylfs)
|
||||||
|
.getFileStatus(eq(localAppLogDirPaths[4]));
|
||||||
|
Mockito.doThrow(new UnsupportedFileSystemException("test")).when(spylfs)
|
||||||
|
.getFileStatus(eq(localAppLogDirPaths[5]));
|
||||||
|
doReturn(fs).when(spylfs).getFileStatus(eq(localAppLogDirPaths[6]));
|
||||||
|
|
||||||
|
logHandler.handle(new LogHandlerAppFinishedEvent(appId));
|
||||||
|
|
||||||
|
testDeletionServiceCall(mockDelService, user, 5000, localAppLogDirPaths[1],
|
||||||
|
localAppLogDirPaths[3], localAppLogDirPaths[6]);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
static class DeletePathsMatcher extends ArgumentMatcher<Path[]> implements
|
||||||
|
VarargMatcher {
|
||||||
|
|
||||||
|
// to get rid of serialization warning
|
||||||
|
static final long serialVersionUID = 0;
|
||||||
|
|
||||||
|
private transient Path[] matchPaths;
|
||||||
|
|
||||||
|
DeletePathsMatcher(Path... matchPaths) {
|
||||||
|
this.matchPaths = matchPaths;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean matches(Object varargs) {
|
||||||
|
return new EqualsBuilder().append(matchPaths, varargs).isEquals();
|
||||||
|
}
|
||||||
|
|
||||||
|
// function to get rid of FindBugs warning
|
||||||
|
private void readObject(ObjectInputStream os) throws NotSerializableException {
|
||||||
|
throw new NotSerializableException(this.getClass().getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Function to verify that the DeletionService object received the right
|
||||||
|
* requests.
|
||||||
|
*
|
||||||
|
* @param delService the DeletionService mock which we verify against
|
||||||
|
*
|
||||||
|
* @param user the user name to use when verifying the deletion
|
||||||
|
*
|
||||||
|
* @param timeout amount in milliseconds to wait before we decide the calls
|
||||||
|
* didn't come through
|
||||||
|
*
|
||||||
|
* @param matchPaths the paths to match in the delete calls
|
||||||
|
*
|
||||||
|
* @throws WantedButNotInvoked if the calls could not be verified
|
||||||
|
*/
|
||||||
|
static void testDeletionServiceCall(DeletionService delService, String user,
|
||||||
|
long timeout, Path... matchPaths) {
|
||||||
|
|
||||||
|
long verifyStartTime = System.currentTimeMillis();
|
||||||
|
WantedButNotInvoked notInvokedException = null;
|
||||||
|
boolean matched = false;
|
||||||
|
while (!matched && System.currentTimeMillis() < verifyStartTime + timeout) {
|
||||||
|
try {
|
||||||
|
verify(delService).delete(eq(user), (Path) eq(null),
|
||||||
|
Mockito.argThat(new DeletePathsMatcher(matchPaths)));
|
||||||
|
matched = true;
|
||||||
|
} catch (WantedButNotInvoked e) {
|
||||||
|
notInvokedException = e;
|
||||||
|
try {
|
||||||
|
Thread.sleep(50l);
|
||||||
|
} catch (InterruptedException i) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!matched) {
|
||||||
|
throw notInvokedException;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static File[] getLocalLogDirFiles(String name, int number) {
|
||||||
|
File[] dirs = new File[number];
|
||||||
|
for (int i = 0; i < dirs.length; i++) {
|
||||||
|
dirs[i] = new File("target", name + "-localLogDir" + i).getAbsoluteFile();
|
||||||
|
}
|
||||||
|
return dirs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue