YARN-6708. Nodemanager container crash after ext3 folder limit. Contributed by Bibin A Chundatt

This commit is contained in:
Jason Lowe 2017-07-06 09:40:09 -05:00
parent 946dd25675
commit 7576a688ea
2 changed files with 71 additions and 9 deletions

View File

@ -31,6 +31,7 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import java.util.concurrent.Callable;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletionService;
@ -60,6 +61,7 @@ import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.concurrent.HadoopExecutors;
import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler;
import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.SerializedException;
import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
@ -95,6 +97,8 @@ public class ContainerLocalizer {
private static final String USERCACHE_CTXT_FMT = "%s.user.cache.dirs";
private static final FsPermission FILECACHE_PERMS =
new FsPermission((short)0710);
private static final FsPermission USERCACHE_FOLDER_PERMS =
new FsPermission((short) 0755);
private final String user;
private final String appId;
@ -237,10 +241,29 @@ public class ContainerLocalizer {
}
Callable<Path> download(Path path, LocalResource rsrc,
Callable<Path> download(Path destDirPath, LocalResource rsrc,
UserGroupInformation ugi) throws IOException {
diskValidator.checkStatus(new File(path.toUri().getRawPath()));
return new FSDownloadWrapper(lfs, ugi, conf, path, rsrc);
// For private localization FsDownload creates folder in destDirPath. Parent
// directories till user filecache folder is created here.
if (rsrc.getVisibility() == LocalResourceVisibility.PRIVATE) {
createParentDirs(destDirPath);
}
diskValidator.checkStatus(new File(destDirPath.toUri().getRawPath()));
return new FSDownloadWrapper(lfs, ugi, conf, destDirPath, rsrc);
}
private void createParentDirs(Path destDirPath) throws IOException {
Path parent = destDirPath.getParent();
Path cacheRoot = LocalCacheDirectoryManager.getCacheDirectoryRoot(parent);
Stack<Path> dirs = new Stack<Path>();
while (!parent.equals(cacheRoot)) {
dirs.push(parent);
parent = parent.getParent();
}
// Create directories with user cache permission
while (!dirs.isEmpty()) {
createDir(lfs, dirs.pop(), USERCACHE_FOLDER_PERMS);
}
}
static long getEstimatedSize(LocalResource rsrc) {
@ -455,21 +478,21 @@ public class ContainerLocalizer {
// $x/usercache/$user/filecache
Path userFileCacheDir = new Path(base, FILECACHE);
usersFileCacheDirs[i] = userFileCacheDir.toString();
createDir(lfs, userFileCacheDir, FILECACHE_PERMS, false);
createDir(lfs, userFileCacheDir, FILECACHE_PERMS);
// $x/usercache/$user/appcache/$appId
Path appBase = new Path(base, new Path(APPCACHE, appId));
// $x/usercache/$user/appcache/$appId/filecache
Path appFileCacheDir = new Path(appBase, FILECACHE);
appsFileCacheDirs[i] = appFileCacheDir.toString();
createDir(lfs, appFileCacheDir, FILECACHE_PERMS, false);
createDir(lfs, appFileCacheDir, FILECACHE_PERMS);
}
conf.setStrings(String.format(APPCACHE_CTXT_FMT, appId), appsFileCacheDirs);
conf.setStrings(String.format(USERCACHE_CTXT_FMT, user), usersFileCacheDirs);
}
private static void createDir(FileContext lfs, Path dirPath,
FsPermission perms, boolean createParent) throws IOException {
lfs.mkdir(dirPath, perms, createParent);
FsPermission perms) throws IOException {
lfs.mkdir(dirPath, perms, false);
if (!perms.equals(perms.applyUMask(lfs.getUMask()))) {
lfs.setPermission(dirPath, perms);
}

View File

@ -38,6 +38,7 @@ import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
@ -50,11 +51,12 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import com.google.common.base.Supplier;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.AbstractFileSystem;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
@ -80,7 +82,7 @@ import org.apache.hadoop.yarn.server.nodemanager.api.ResourceLocalizationSpec;
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalResourceStatus;
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerAction;
import org.apache.hadoop.yarn.server.nodemanager.api.protocolrecords.LocalizerStatus;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Test;
import org.mockito.ArgumentMatcher;
@ -88,12 +90,15 @@ import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import com.google.common.base.Supplier;
public class TestContainerLocalizer {
static final Log LOG = LogFactory.getLog(TestContainerLocalizer.class);
static final Path basedir =
new Path("target", TestContainerLocalizer.class.getName());
static final FsPermission CACHE_DIR_PERM = new FsPermission((short)0710);
static final FsPermission USERCACHE_DIR_PERM = new FsPermission((short) 0755);
static final String appUser = "yak";
static final String appId = "app_RM_0";
@ -101,6 +106,10 @@ public class TestContainerLocalizer {
static final InetSocketAddress nmAddr =
new InetSocketAddress("foobar", 8040);
@After
public void cleanUp() throws IOException {
FileUtils.deleteDirectory(new File(basedir.toUri().getRawPath()));
}
@Test
public void testMain() throws Exception {
@ -635,4 +644,34 @@ static DataInputBuffer createFakeCredentials(Random r, int nTok)
return ret;
}
@Test(timeout = 10000)
public void testUserCacheDirPermission() throws Exception {
Configuration conf = new Configuration();
conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077");
FileContext lfs = FileContext.getLocalFSFileContext(conf);
Path fileCacheDir = lfs.makeQualified(new Path(basedir, "filecache"));
lfs.mkdir(fileCacheDir, FsPermission.getDefault(), true);
RecordFactory recordFactory = mock(RecordFactory.class);
ContainerLocalizer localizer = new ContainerLocalizer(lfs,
UserGroupInformation.getCurrentUser().getUserName(), "application_01",
"container_01", new ArrayList<Path>(), recordFactory);
LocalResource rsrc = mock(LocalResource.class);
when(rsrc.getVisibility()).thenReturn(LocalResourceVisibility.PRIVATE);
Path destDirPath = new Path(fileCacheDir, "0/0/85");
//create one of the parent directories with the wrong permissions first
FsPermission wrongPerm = new FsPermission((short) 0700);
lfs.mkdir(destDirPath.getParent().getParent(), wrongPerm, false);
lfs.mkdir(destDirPath.getParent(), wrongPerm, false);
//Localize and check the directory permission are correct.
localizer
.download(destDirPath, rsrc, UserGroupInformation.getCurrentUser());
Assert
.assertEquals("Cache directory permissions filecache/0/0 is incorrect",
USERCACHE_DIR_PERM,
lfs.getFileStatus(destDirPath.getParent()).getPermission());
Assert.assertEquals("Cache directory permissions filecache/0 is incorrect",
USERCACHE_DIR_PERM,
lfs.getFileStatus(destDirPath.getParent().getParent()).getPermission());
}
}