MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is bad (Jason Lowe via bobby)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1367783 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Joseph Evans 2012-07-31 21:08:40 +00:00
parent 9d42fb2e8e
commit 123c4f57d3
3 changed files with 59 additions and 21 deletions

View File

@ -463,6 +463,9 @@ Release 2.0.0-alpha - 05-23-2012
MAPREDUCE-4483. 2.0 build does not work (John George via bobby)
MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is
bad (Jason Lowe via bobby)
Release 0.23.3 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -93,23 +93,7 @@ public class LocalDirsHandlerService extends AbstractService {
@Override
public void run() {
boolean newFailure = false;
if (localDirs.checkDirs()) {
newFailure = true;
}
if (logDirs.checkDirs()) {
newFailure = true;
}
if (newFailure) {
LOG.info("Disk(s) failed. " + getDisksHealthReport());
updateDirsInConfiguration();
if (!areDisksHealthy()) {
// Just log.
LOG.error("Most of the disks failed. " + getDisksHealthReport());
}
}
lastDisksCheckTime = System.currentTimeMillis();
checkDirs();
}
}
@ -135,6 +119,10 @@ public class LocalDirsHandlerService extends AbstractService {
YarnConfiguration.DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION);
lastDisksCheckTime = System.currentTimeMillis();
super.init(conf);
// Check the disk health immediately to weed out bad directories
// before other init code attempts to use them.
checkDirs();
}
/**
@ -144,10 +132,8 @@ public class LocalDirsHandlerService extends AbstractService {
public void start() {
if (isDiskHealthCheckerEnabled) {
dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
// Start the timer task for disk health checking immediately and
// then run periodically at interval time.
dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask, 0,
diskHealthCheckInterval);
dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask,
diskHealthCheckInterval, diskHealthCheckInterval);
}
super.start();
}
@ -253,6 +239,26 @@ public class LocalDirsHandlerService extends AbstractService {
logDirs.toArray(new String[logDirs.size()]));
}
private void checkDirs() {
boolean newFailure = false;
if (localDirs.checkDirs()) {
newFailure = true;
}
if (logDirs.checkDirs()) {
newFailure = true;
}
if (newFailure) {
LOG.info("Disk(s) failed. " + getDisksHealthReport());
updateDirsInConfiguration();
if (!areDisksHealthy()) {
// Just log.
LOG.error("Most of the disks failed. " + getDisksHealthReport());
}
}
lastDisksCheckTime = System.currentTimeMillis();
}
public Path getLocalPathForWrite(String pathStr) throws IOException {
return localDirsAllocator.getLocalPathForWrite(pathStr, getConfig());
}

View File

@ -110,6 +110,35 @@ public class TestDiskFailures {
testDirsFailures(false);
}
/**
* Make a local and log directory inaccessible during initialization
* and verify those bad directories are recognized and removed from
* the list of available local and log directories.
* @throws IOException
*/
@Test
public void testDirFailuresOnStartup() throws IOException {
Configuration conf = new YarnConfiguration();
String localDir1 = new File(testDir, "localDir1").getPath();
String localDir2 = new File(testDir, "localDir2").getPath();
String logDir1 = new File(testDir, "logDir1").getPath();
String logDir2 = new File(testDir, "logDir2").getPath();
conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir1 + "," + localDir2);
conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2);
prepareDirToFail(localDir1);
prepareDirToFail(logDir2);
LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
dirSvc.init(conf);
List<String> localDirs = dirSvc.getLocalDirs();
Assert.assertEquals(1, localDirs.size());
Assert.assertEquals(localDir2, localDirs.get(0));
List<String> logDirs = dirSvc.getLogDirs();
Assert.assertEquals(1, logDirs.size());
Assert.assertEquals(logDir1, logDirs.get(0));
}
private void testDirsFailures(boolean localORLogDirs) throws IOException {
String dirType = localORLogDirs ? "local" : "log";
String dirsProperty = localORLogDirs ? YarnConfiguration.NM_LOCAL_DIRS