diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1f4d8519241..98dc3546e0d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -159,6 +159,9 @@ Release 2.6.1 - UNRELEASED YARN-3832. Resource Localization fails on a cluster due to existing cache directories (Brahma Reddy Battula via jlowe) + YARN-3850. NM fails to read files from full disks which can lead to + container logs being lost and other issues (Varun Saxena via jlowe) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index 7d1aa534bef..d6950a37358 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -212,6 +212,18 @@ public class LocalDirsHandlerService extends AbstractService { return logDirs.getFullDirs(); } + /** + * Function to get the local dirs which should be considered for reading + * existing files on disk. Contains the good local dirs and the local dirs + * that have reached the disk space limit + * + * @return the local dirs which should be considered for reading + */ + public List getLocalDirsForRead() { + return DirectoryCollection.concat(localDirs.getGoodDirs(), + localDirs.getFullDirs()); + } + /** * Function to get the local dirs which should be considered when cleaning up * resources. Contains the good local dirs and the local dirs that have reached @@ -224,6 +236,18 @@ public class LocalDirsHandlerService extends AbstractService { localDirs.getFullDirs()); } + /** + * Function to get the log dirs which should be considered for reading + * existing files on disk. Contains the good log dirs and the log dirs that + * have reached the disk space limit + * + * @return the log dirs which should be considered for reading + */ + public List getLogDirsForRead() { + return DirectoryCollection.concat(logDirs.getGoodDirs(), + logDirs.getFullDirs()); + } + /** * Function to get the log dirs which should be considered when cleaning up * resources. Contains the good log dirs and the log dirs that have reached diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java index 03a39aa5ab6..c662ecdf984 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/launcher/RecoveredContainerLaunch.java @@ -121,7 +121,8 @@ public class RecoveredContainerLaunch extends ContainerLaunch { private File locatePidFile(String appIdStr, String containerIdStr) { String pidSubpath= getPidFileSubpath(appIdStr, containerIdStr); - for (String dir : getContext().getLocalDirsHandler().getLocalDirs()) { + for (String dir : getContext().getLocalDirsHandler(). + getLocalDirsForRead()) { File pidFile = new File(dir, pidSubpath); if (pidFile.exists()) { return pidFile; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java index 20887b66483..65c55014daa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java @@ -524,10 +524,10 @@ public class AppLogAggregatorImpl implements AppLogAggregator { public Set doContainerLogAggregation(LogWriter writer) { LOG.info("Uploading logs for container " + containerId + ". Current good log dirs are " - + StringUtils.join(",", dirsHandler.getLogDirs())); + + StringUtils.join(",", dirsHandler.getLogDirsForRead())); final LogKey logKey = new LogKey(containerId); final LogValue logValue = - new LogValue(dirsHandler.getLogDirs(), containerId, + new LogValue(dirsHandler.getLogDirsForRead(), containerId, userUgi.getShortUserName(), logAggregationContext, this.uploadedFileMeta); try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java index c588a89ffe8..319f49be5a6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/webapp/ContainerLogsUtils.java @@ -74,7 +74,7 @@ public class ContainerLogsUtils { static List getContainerLogDirs(ContainerId containerId, LocalDirsHandlerService dirsHandler) throws YarnException { - List logDirs = dirsHandler.getLogDirs(); + List logDirs = dirsHandler.getLogDirsForRead(); List containerLogDirs = new ArrayList(logDirs.size()); for (String logDir : logDirs) { logDir = new File(logDir).toURI().getPath(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 7d911e99c58..e2c45db8497 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -172,22 +172,11 @@ public class TestLogAggregationService extends BaseContainerManagerTest { dispatcher.close(); } - @Test - public void testLocalFileDeletionAfterUpload() throws Exception { - this.delSrvc = new DeletionService(createContainerExecutor()); - delSrvc = spy(delSrvc); - this.delSrvc.init(conf); - this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); - this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, - this.remoteRootLogDir.getAbsolutePath()); - - LogAggregationService logAggregationService = spy( - new LogAggregationService(dispatcher, this.context, this.delSrvc, - super.dirsHandler)); + private void verifyLocalFileDeletion( + LogAggregationService logAggregationService) throws Exception { logAggregationService.init(this.conf); logAggregationService.start(); - ApplicationId application1 = BuilderUtils.newApplicationId(1234, 1); // AppLogDir should be created @@ -247,10 +236,47 @@ public class TestLogAggregationService extends BaseContainerManagerTest { ApplicationEventType.APPLICATION_LOG_HANDLING_FINISHED) }; - checkEvents(appEventHandler, expectedEvents, true, "getType", "getApplicationID"); + checkEvents(appEventHandler, expectedEvents, true, "getType", + "getApplicationID"); dispatcher.stop(); } + @Test + public void testLocalFileDeletionAfterUpload() throws Exception { + this.delSrvc = new DeletionService(createContainerExecutor()); + delSrvc = spy(delSrvc); + this.delSrvc.init(conf); + this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); + this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + this.remoteRootLogDir.getAbsolutePath()); + + LogAggregationService logAggregationService = spy( + new LogAggregationService(dispatcher, this.context, this.delSrvc, + super.dirsHandler)); + verifyLocalFileDeletion(logAggregationService); + } + + @Test + public void testLocalFileDeletionOnDiskFull() throws Exception { + this.delSrvc = new DeletionService(createContainerExecutor()); + delSrvc = spy(delSrvc); + this.delSrvc.init(conf); + this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); + this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, + this.remoteRootLogDir.getAbsolutePath()); + List logDirs = super.dirsHandler.getLogDirs(); + LocalDirsHandlerService dirsHandler = spy(super.dirsHandler); + // Simulate disk being full by returning no good log dirs but having a + // directory in full log dirs. + when(dirsHandler.getLogDirs()).thenReturn(new ArrayList()); + when(dirsHandler.getLogDirsForRead()).thenReturn(logDirs); + LogAggregationService logAggregationService = spy( + new LogAggregationService(dispatcher, this.context, this.delSrvc, + dirsHandler)); + verifyLocalFileDeletion(logAggregationService); + } + + @Test public void testNoContainerOnNode() throws Exception { this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java index b1d439779ac..39c52d9dc7b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/TestContainerLogsPage.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.nodemanager.webapp; import static org.junit.Assume.assumeTrue; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; import static org.mockito.Mockito.verify; @@ -29,6 +30,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -115,6 +117,24 @@ public class TestContainerLogsPage { Assert.assertNull(nmContext.getContainers().get(container1)); files = ContainerLogsUtils.getContainerLogDirs(container1, user, nmContext); Assert.assertTrue(!(files.get(0).toString().contains("file:"))); + + // Create a new context to check if correct container log dirs are fetched + // on full disk. + LocalDirsHandlerService dirsHandlerForFullDisk = spy(dirsHandler); + // good log dirs are empty and nm log dir is in the full log dir list. + when(dirsHandlerForFullDisk.getLogDirs()). + thenReturn(new ArrayList()); + when(dirsHandlerForFullDisk.getLogDirsForRead()). + thenReturn(Arrays.asList(new String[] {absLogDir.getAbsolutePath()})); + nmContext = new NodeManager.NMContext(null, null, dirsHandlerForFullDisk, + new ApplicationACLsManager(conf), new NMNullStateStoreService()); + nmContext.getApplications().put(appId, app); + container.setState(ContainerState.RUNNING); + nmContext.getContainers().put(container1, container); + List dirs = + ContainerLogsUtils.getContainerLogDirs(container1, user, nmContext); + File containerLogDir = new File(absLogDir, appId + "/" + container1); + Assert.assertTrue(dirs.contains(containerLogDir)); } @Test(timeout = 10000) @@ -224,7 +244,7 @@ public class TestContainerLogsPage { LocalDirsHandlerService localDirs = mock(LocalDirsHandlerService.class); List logDirs = new ArrayList(); logDirs.add("F:/nmlogs"); - when(localDirs.getLogDirs()).thenReturn(logDirs); + when(localDirs.getLogDirsForRead()).thenReturn(logDirs); ApplicationIdPBImpl appId = mock(ApplicationIdPBImpl.class); when(appId.toString()).thenReturn("app_id_1");