MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is bad (Jason Lowe via bobby)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1367783 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9d42fb2e8e
commit
123c4f57d3
|
@ -463,6 +463,9 @@ Release 2.0.0-alpha - 05-23-2012
|
|||
|
||||
MAPREDUCE-4483. 2.0 build does not work (John George via bobby)
|
||||
|
||||
MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is
|
||||
bad (Jason Lowe via bobby)
|
||||
|
||||
Release 0.23.3 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -93,23 +93,7 @@ public class LocalDirsHandlerService extends AbstractService {
|
|||
|
||||
@Override
|
||||
public void run() {
|
||||
boolean newFailure = false;
|
||||
if (localDirs.checkDirs()) {
|
||||
newFailure = true;
|
||||
}
|
||||
if (logDirs.checkDirs()) {
|
||||
newFailure = true;
|
||||
}
|
||||
|
||||
if (newFailure) {
|
||||
LOG.info("Disk(s) failed. " + getDisksHealthReport());
|
||||
updateDirsInConfiguration();
|
||||
if (!areDisksHealthy()) {
|
||||
// Just log.
|
||||
LOG.error("Most of the disks failed. " + getDisksHealthReport());
|
||||
}
|
||||
}
|
||||
lastDisksCheckTime = System.currentTimeMillis();
|
||||
checkDirs();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -135,6 +119,10 @@ public class LocalDirsHandlerService extends AbstractService {
|
|||
YarnConfiguration.DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION);
|
||||
lastDisksCheckTime = System.currentTimeMillis();
|
||||
super.init(conf);
|
||||
|
||||
// Check the disk health immediately to weed out bad directories
|
||||
// before other init code attempts to use them.
|
||||
checkDirs();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -144,10 +132,8 @@ public class LocalDirsHandlerService extends AbstractService {
|
|||
public void start() {
|
||||
if (isDiskHealthCheckerEnabled) {
|
||||
dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
|
||||
// Start the timer task for disk health checking immediately and
|
||||
// then run periodically at interval time.
|
||||
dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask, 0,
|
||||
diskHealthCheckInterval);
|
||||
dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask,
|
||||
diskHealthCheckInterval, diskHealthCheckInterval);
|
||||
}
|
||||
super.start();
|
||||
}
|
||||
|
@ -253,6 +239,26 @@ public class LocalDirsHandlerService extends AbstractService {
|
|||
logDirs.toArray(new String[logDirs.size()]));
|
||||
}
|
||||
|
||||
private void checkDirs() {
|
||||
boolean newFailure = false;
|
||||
if (localDirs.checkDirs()) {
|
||||
newFailure = true;
|
||||
}
|
||||
if (logDirs.checkDirs()) {
|
||||
newFailure = true;
|
||||
}
|
||||
|
||||
if (newFailure) {
|
||||
LOG.info("Disk(s) failed. " + getDisksHealthReport());
|
||||
updateDirsInConfiguration();
|
||||
if (!areDisksHealthy()) {
|
||||
// Just log.
|
||||
LOG.error("Most of the disks failed. " + getDisksHealthReport());
|
||||
}
|
||||
}
|
||||
lastDisksCheckTime = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
public Path getLocalPathForWrite(String pathStr) throws IOException {
|
||||
return localDirsAllocator.getLocalPathForWrite(pathStr, getConfig());
|
||||
}
|
||||
|
|
|
@ -110,6 +110,35 @@ public class TestDiskFailures {
|
|||
testDirsFailures(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a local and log directory inaccessible during initialization
|
||||
* and verify those bad directories are recognized and removed from
|
||||
* the list of available local and log directories.
|
||||
* @throws IOException
|
||||
*/
|
||||
@Test
|
||||
public void testDirFailuresOnStartup() throws IOException {
|
||||
Configuration conf = new YarnConfiguration();
|
||||
String localDir1 = new File(testDir, "localDir1").getPath();
|
||||
String localDir2 = new File(testDir, "localDir2").getPath();
|
||||
String logDir1 = new File(testDir, "logDir1").getPath();
|
||||
String logDir2 = new File(testDir, "logDir2").getPath();
|
||||
conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir1 + "," + localDir2);
|
||||
conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2);
|
||||
|
||||
prepareDirToFail(localDir1);
|
||||
prepareDirToFail(logDir2);
|
||||
|
||||
LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
|
||||
dirSvc.init(conf);
|
||||
List<String> localDirs = dirSvc.getLocalDirs();
|
||||
Assert.assertEquals(1, localDirs.size());
|
||||
Assert.assertEquals(localDir2, localDirs.get(0));
|
||||
List<String> logDirs = dirSvc.getLogDirs();
|
||||
Assert.assertEquals(1, logDirs.size());
|
||||
Assert.assertEquals(logDir1, logDirs.get(0));
|
||||
}
|
||||
|
||||
private void testDirsFailures(boolean localORLogDirs) throws IOException {
|
||||
String dirType = localORLogDirs ? "local" : "log";
|
||||
String dirsProperty = localORLogDirs ? YarnConfiguration.NM_LOCAL_DIRS
|
||||
|
|
Loading…
Reference in New Issue