MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is bad (Jason Lowe via bobby)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1367783 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9d42fb2e8e
commit
123c4f57d3
|
@ -463,6 +463,9 @@ Release 2.0.0-alpha - 05-23-2012
|
||||||
|
|
||||||
MAPREDUCE-4483. 2.0 build does not work (John George via bobby)
|
MAPREDUCE-4483. 2.0 build does not work (John George via bobby)
|
||||||
|
|
||||||
|
MAPREDUCE-4444. nodemanager fails to start when one of the local-dirs is
|
||||||
|
bad (Jason Lowe via bobby)
|
||||||
|
|
||||||
Release 0.23.3 - UNRELEASED
|
Release 0.23.3 - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -93,23 +93,7 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
boolean newFailure = false;
|
checkDirs();
|
||||||
if (localDirs.checkDirs()) {
|
|
||||||
newFailure = true;
|
|
||||||
}
|
|
||||||
if (logDirs.checkDirs()) {
|
|
||||||
newFailure = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (newFailure) {
|
|
||||||
LOG.info("Disk(s) failed. " + getDisksHealthReport());
|
|
||||||
updateDirsInConfiguration();
|
|
||||||
if (!areDisksHealthy()) {
|
|
||||||
// Just log.
|
|
||||||
LOG.error("Most of the disks failed. " + getDisksHealthReport());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lastDisksCheckTime = System.currentTimeMillis();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,6 +119,10 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
YarnConfiguration.DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION);
|
YarnConfiguration.DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION);
|
||||||
lastDisksCheckTime = System.currentTimeMillis();
|
lastDisksCheckTime = System.currentTimeMillis();
|
||||||
super.init(conf);
|
super.init(conf);
|
||||||
|
|
||||||
|
// Check the disk health immediately to weed out bad directories
|
||||||
|
// before other init code attempts to use them.
|
||||||
|
checkDirs();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -144,10 +132,8 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
public void start() {
|
public void start() {
|
||||||
if (isDiskHealthCheckerEnabled) {
|
if (isDiskHealthCheckerEnabled) {
|
||||||
dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
|
dirsHandlerScheduler = new Timer("DiskHealthMonitor-Timer", true);
|
||||||
// Start the timer task for disk health checking immediately and
|
dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask,
|
||||||
// then run periodically at interval time.
|
diskHealthCheckInterval, diskHealthCheckInterval);
|
||||||
dirsHandlerScheduler.scheduleAtFixedRate(monitoringTimerTask, 0,
|
|
||||||
diskHealthCheckInterval);
|
|
||||||
}
|
}
|
||||||
super.start();
|
super.start();
|
||||||
}
|
}
|
||||||
|
@ -253,6 +239,26 @@ public class LocalDirsHandlerService extends AbstractService {
|
||||||
logDirs.toArray(new String[logDirs.size()]));
|
logDirs.toArray(new String[logDirs.size()]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkDirs() {
|
||||||
|
boolean newFailure = false;
|
||||||
|
if (localDirs.checkDirs()) {
|
||||||
|
newFailure = true;
|
||||||
|
}
|
||||||
|
if (logDirs.checkDirs()) {
|
||||||
|
newFailure = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (newFailure) {
|
||||||
|
LOG.info("Disk(s) failed. " + getDisksHealthReport());
|
||||||
|
updateDirsInConfiguration();
|
||||||
|
if (!areDisksHealthy()) {
|
||||||
|
// Just log.
|
||||||
|
LOG.error("Most of the disks failed. " + getDisksHealthReport());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
lastDisksCheckTime = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
public Path getLocalPathForWrite(String pathStr) throws IOException {
|
public Path getLocalPathForWrite(String pathStr) throws IOException {
|
||||||
return localDirsAllocator.getLocalPathForWrite(pathStr, getConfig());
|
return localDirsAllocator.getLocalPathForWrite(pathStr, getConfig());
|
||||||
}
|
}
|
||||||
|
|
|
@ -110,6 +110,35 @@ public class TestDiskFailures {
|
||||||
testDirsFailures(false);
|
testDirsFailures(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make a local and log directory inaccessible during initialization
|
||||||
|
* and verify those bad directories are recognized and removed from
|
||||||
|
* the list of available local and log directories.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDirFailuresOnStartup() throws IOException {
|
||||||
|
Configuration conf = new YarnConfiguration();
|
||||||
|
String localDir1 = new File(testDir, "localDir1").getPath();
|
||||||
|
String localDir2 = new File(testDir, "localDir2").getPath();
|
||||||
|
String logDir1 = new File(testDir, "logDir1").getPath();
|
||||||
|
String logDir2 = new File(testDir, "logDir2").getPath();
|
||||||
|
conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir1 + "," + localDir2);
|
||||||
|
conf.set(YarnConfiguration.NM_LOG_DIRS, logDir1 + "," + logDir2);
|
||||||
|
|
||||||
|
prepareDirToFail(localDir1);
|
||||||
|
prepareDirToFail(logDir2);
|
||||||
|
|
||||||
|
LocalDirsHandlerService dirSvc = new LocalDirsHandlerService();
|
||||||
|
dirSvc.init(conf);
|
||||||
|
List<String> localDirs = dirSvc.getLocalDirs();
|
||||||
|
Assert.assertEquals(1, localDirs.size());
|
||||||
|
Assert.assertEquals(localDir2, localDirs.get(0));
|
||||||
|
List<String> logDirs = dirSvc.getLogDirs();
|
||||||
|
Assert.assertEquals(1, logDirs.size());
|
||||||
|
Assert.assertEquals(logDir1, logDirs.get(0));
|
||||||
|
}
|
||||||
|
|
||||||
private void testDirsFailures(boolean localORLogDirs) throws IOException {
|
private void testDirsFailures(boolean localORLogDirs) throws IOException {
|
||||||
String dirType = localORLogDirs ? "local" : "log";
|
String dirType = localORLogDirs ? "local" : "log";
|
||||||
String dirsProperty = localORLogDirs ? YarnConfiguration.NM_LOCAL_DIRS
|
String dirsProperty = localORLogDirs ? YarnConfiguration.NM_LOCAL_DIRS
|
||||||
|
|
Loading…
Reference in New Issue