YARN-112. Fixed a race condition during localization that fails containers. Contributed by Omkar Vinit Joshi.

MAPREDUCE-5138. Fix LocalDistributedCacheManager after YARN-112. Contributed by Omkar Vinit Joshi.


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1466196 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-04-09 19:56:10 +00:00
parent d9593621b5
commit 3a54a5653b
10 changed files with 98 additions and 34 deletions

View File

@ -225,6 +225,9 @@ Release 2.0.4-beta - UNRELEASED
MAPREDUCE-5088. MR Client gets an renewer token exception while Oozie is MAPREDUCE-5088. MR Client gets an renewer token exception while Oozie is
submitting a job (Daryn Sharp via cos) submitting a job (Daryn Sharp via cos)
MAPREDUCE-5138. Fix LocalDistributedCacheManager after YARN-112. (Omkar Vinit
Joshi via vinodkv)
Release 2.0.3-alpha - 2013-02-06 Release 2.0.3-alpha - 2013-02-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -32,13 +32,13 @@
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
import java.util.Random;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory; import java.util.concurrent.ThreadFactory;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -91,6 +91,9 @@ public void setup(JobConf conf) throws IOException {
Map<String, LocalResource> localResources = Map<String, LocalResource> localResources =
new LinkedHashMap<String, LocalResource>(); new LinkedHashMap<String, LocalResource>();
MRApps.setupDistributedCache(conf, localResources); MRApps.setupDistributedCache(conf, localResources);
// Generating unique numbers for FSDownload.
AtomicLong uniqueNumberGenerator =
new AtomicLong(System.currentTimeMillis());
// Find which resources are to be put on the local classpath // Find which resources are to be put on the local classpath
Map<String, Path> classpaths = new HashMap<String, Path>(); Map<String, Path> classpaths = new HashMap<String, Path>();
@ -128,8 +131,10 @@ public void setup(JobConf conf) throws IOException {
Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap(); Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
for (LocalResource resource : localResources.values()) { for (LocalResource resource : localResources.values()) {
Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, Callable<Path> download =
destPath, resource, new Random()); new FSDownload(localFSFileContext, ugi, conf, new Path(destPath,
Long.toString(uniqueNumberGenerator.incrementAndGet())),
resource);
Future<Path> future = exec.submit(download); Future<Path> future = exec.submit(download);
resourcesToPaths.put(resource, future); resourcesToPaths.put(resource, future);
} }

View File

@ -208,6 +208,9 @@ Release 2.0.5-beta - UNRELEASED
local directory hits unix file count limits and thus prevent job failures. local directory hits unix file count limits and thus prevent job failures.
(Omkar Vinit Joshi via vinodkv) (Omkar Vinit Joshi via vinodkv)
YARN-112. Fixed a race condition during localization that fails containers.
(Omkar Vinit Joshi via vinodkv)
Release 2.0.4-alpha - UNRELEASED Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -23,7 +23,6 @@
import java.io.IOException; import java.io.IOException;
import java.net.URISyntaxException; import java.net.URISyntaxException;
import java.security.PrivilegedExceptionAction; import java.security.PrivilegedExceptionAction;
import java.util.Random;
import java.util.concurrent.Callable; import java.util.concurrent.Callable;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -36,13 +35,12 @@
import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.RunJar; import org.apache.hadoop.util.RunJar;
import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.util.ConverterUtils;
/** /**
* Download a single URL to the local disk. * Download a single URL to the local disk.
@ -51,8 +49,7 @@
public class FSDownload implements Callable<Path> { public class FSDownload implements Callable<Path> {
private static final Log LOG = LogFactory.getLog(FSDownload.class); private static final Log LOG = LogFactory.getLog(FSDownload.class);
private Random rand;
private FileContext files; private FileContext files;
private final UserGroupInformation userUgi; private final UserGroupInformation userUgi;
private Configuration conf; private Configuration conf;
@ -71,13 +68,12 @@ public class FSDownload implements Callable<Path> {
public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf, public FSDownload(FileContext files, UserGroupInformation ugi, Configuration conf,
Path destDirPath, LocalResource resource, Random rand) { Path destDirPath, LocalResource resource) {
this.conf = conf; this.conf = conf;
this.destDirPath = destDirPath; this.destDirPath = destDirPath;
this.files = files; this.files = files;
this.userUgi = ugi; this.userUgi = ugi;
this.resource = resource; this.resource = resource;
this.rand = rand;
} }
LocalResource getResource() { LocalResource getResource() {
@ -270,11 +266,6 @@ public Path call() throws Exception {
} catch (URISyntaxException e) { } catch (URISyntaxException e) {
throw new IOException("Invalid resource", e); throw new IOException("Invalid resource", e);
} }
Path tmp;
do {
tmp = new Path(destDirPath, String.valueOf(rand.nextLong()));
} while (files.util().exists(tmp));
destDirPath = tmp;
createDir(destDirPath, cachePerms); createDir(destDirPath, cachePerms);
final Path dst_work = new Path(destDirPath + "_tmp"); final Path dst_work = new Path(destDirPath + "_tmp");
createDir(dst_work, cachePerms); createDir(dst_work, cachePerms);
@ -305,8 +296,6 @@ public Path run() throws Exception {
files.delete(dst_work, true); files.delete(dst_work, true);
} catch (FileNotFoundException ignore) { } catch (FileNotFoundException ignore) {
} }
// clear ref to internal var
rand = null;
conf = null; conf = null;
resource = null; resource = null;
} }

View File

@ -35,6 +35,7 @@
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Future; import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import java.util.jar.JarOutputStream; import java.util.jar.JarOutputStream;
import java.util.jar.Manifest; import java.util.jar.Manifest;
@ -66,6 +67,8 @@
public class TestFSDownload { public class TestFSDownload {
private static final Log LOG = LogFactory.getLog(TestFSDownload.class); private static final Log LOG = LogFactory.getLog(TestFSDownload.class);
private static AtomicLong uniqueNumberGenerator =
new AtomicLong(System.currentTimeMillis());
@AfterClass @AfterClass
public static void deleteTestDir() throws IOException { public static void deleteTestDir() throws IOException {
@ -267,9 +270,11 @@ public void testDownloadBadPublic() throws IOException, URISyntaxException,
rsrcVis.put(rsrc, vis); rsrcVis.put(rsrc, vis);
Path destPath = dirs.getLocalPathForWrite( Path destPath = dirs.getLocalPathForWrite(
basedir.toString(), size, conf); basedir.toString(), size, conf);
destPath = new Path (destPath,
Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsd = FSDownload fsd =
new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
destPath, rsrc, new Random(sharedSeed)); destPath, rsrc);
pending.put(rsrc, exec.submit(fsd)); pending.put(rsrc, exec.submit(fsd));
try { try {
@ -320,9 +325,11 @@ public void testDownload() throws IOException, URISyntaxException,
rsrcVis.put(rsrc, vis); rsrcVis.put(rsrc, vis);
Path destPath = dirs.getLocalPathForWrite( Path destPath = dirs.getLocalPathForWrite(
basedir.toString(), sizes[i], conf); basedir.toString(), sizes[i], conf);
destPath = new Path (destPath,
Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsd = FSDownload fsd =
new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
destPath, rsrc, new Random(sharedSeed)); destPath, rsrc);
pending.put(rsrc, exec.submit(fsd)); pending.put(rsrc, exec.submit(fsd));
} }
@ -380,9 +387,10 @@ public void testDownloadArchive() throws IOException, URISyntaxException,
Path p = new Path(basedir, "" + 1); Path p = new Path(basedir, "" + 1);
LocalResource rsrc = createTarFile(files, p, size, rand, vis); LocalResource rsrc = createTarFile(files, p, size, rand, vis);
Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf); Path destPath = dirs.getLocalPathForWrite(basedir.toString(), size, conf);
destPath = new Path (destPath,
Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsd = new FSDownload(files, FSDownload fsd = new FSDownload(files,
UserGroupInformation.getCurrentUser(), conf, destPath, rsrc, UserGroupInformation.getCurrentUser(), conf, destPath, rsrc);
new Random(sharedSeed));
pending.put(rsrc, exec.submit(fsd)); pending.put(rsrc, exec.submit(fsd));
try { try {
@ -437,9 +445,10 @@ public void testDownloadPatternJar() throws IOException, URISyntaxException,
LocalResource rsrcjar = createJarFile(files, p, size, rand, vis); LocalResource rsrcjar = createJarFile(files, p, size, rand, vis);
rsrcjar.setType(LocalResourceType.PATTERN); rsrcjar.setType(LocalResourceType.PATTERN);
Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf); Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf);
destPathjar = new Path (destPathjar,
Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsdjar = new FSDownload(files, FSDownload fsdjar = new FSDownload(files,
UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrcjar, UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrcjar);
new Random(sharedSeed));
pending.put(rsrcjar, exec.submit(fsdjar)); pending.put(rsrcjar, exec.submit(fsdjar));
try { try {
@ -493,9 +502,10 @@ public void testDownloadArchiveZip() throws IOException, URISyntaxException,
Path p = new Path(basedir, "" + 1); Path p = new Path(basedir, "" + 1);
LocalResource rsrczip = createZipFile(files, p, size, rand, vis); LocalResource rsrczip = createZipFile(files, p, size, rand, vis);
Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf); Path destPathjar = dirs.getLocalPathForWrite(basedir.toString(), size, conf);
destPathjar = new Path (destPathjar,
Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsdzip = new FSDownload(files, FSDownload fsdzip = new FSDownload(files,
UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrczip, UserGroupInformation.getCurrentUser(), conf, destPathjar, rsrczip);
new Random(sharedSeed));
pending.put(rsrczip, exec.submit(fsdzip)); pending.put(rsrczip, exec.submit(fsdzip));
try { try {
@ -586,9 +596,11 @@ public void testDirDownload() throws IOException, InterruptedException {
rsrcVis.put(rsrc, vis); rsrcVis.put(rsrc, vis);
Path destPath = dirs.getLocalPathForWrite( Path destPath = dirs.getLocalPathForWrite(
basedir.toString(), conf); basedir.toString(), conf);
destPath = new Path (destPath,
Long.toString(uniqueNumberGenerator.incrementAndGet()));
FSDownload fsd = FSDownload fsd =
new FSDownload(files, UserGroupInformation.getCurrentUser(), conf, new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
destPath, rsrc, new Random(sharedSeed)); destPath, rsrc);
pending.put(rsrc, exec.submit(fsd)); pending.put(rsrc, exec.submit(fsd));
} }
@ -614,4 +626,38 @@ public void testDirDownload() throws IOException, InterruptedException {
} }
}
@Test(timeout = 1000)
public void testUniqueDestinationPath() throws Exception {
Configuration conf = new Configuration();
FileContext files = FileContext.getLocalFSFileContext(conf);
final Path basedir = files.makeQualified(new Path("target",
TestFSDownload.class.getSimpleName()));
files.mkdir(basedir, null, true);
conf.setStrings(TestFSDownload.class.getName(), basedir.toString());
ExecutorService singleThreadedExec = Executors.newSingleThreadExecutor();
LocalDirAllocator dirs =
new LocalDirAllocator(TestFSDownload.class.getName());
Path destPath = dirs.getLocalPathForWrite(basedir.toString(), conf);
destPath =
new Path(destPath, Long.toString(uniqueNumberGenerator
.incrementAndGet()));
try {
Path p = new Path(basedir, "dir" + 0 + ".jar");
LocalResourceVisibility vis = LocalResourceVisibility.PRIVATE;
LocalResource rsrc = createJar(files, p, vis);
FSDownload fsd =
new FSDownload(files, UserGroupInformation.getCurrentUser(), conf,
destPath, rsrc);
Future<Path> rPath = singleThreadedExec.submit(fsd);
// Now FSDownload will not create a random directory to localize the
// resource. Therefore the final localizedPath for the resource should be
// destination directory (passed as an argument) + file name.
Assert.assertEquals(destPath, rPath.get().getParent());
} finally {
singleThreadedExec.shutdown();
}
}
}

View File

@ -198,7 +198,7 @@ CompletionService<Path> createCompletionService(ExecutorService exec) {
Callable<Path> download(Path path, LocalResource rsrc, Callable<Path> download(Path path, LocalResource rsrc,
UserGroupInformation ugi) throws IOException { UserGroupInformation ugi) throws IOException {
DiskChecker.checkDir(new File(path.toUri().getRawPath())); DiskChecker.checkDir(new File(path.toUri().getRawPath()));
return new FSDownload(lfs, ugi, conf, path, rsrc, new Random()); return new FSDownload(lfs, ugi, conf, path, rsrc);
} }
static long getEstimatedSize(LocalResource rsrc) { static long getEstimatedSize(LocalResource rsrc) {

View File

@ -43,4 +43,5 @@ interface LocalResourcesTracker
// TODO: Remove this in favour of EventHandler.handle // TODO: Remove this in favour of EventHandler.handle
void localizationCompleted(LocalResourceRequest req, boolean success); void localizationCompleted(LocalResourceRequest req, boolean success);
long nextUniqueNumber();
} }

View File

@ -21,6 +21,7 @@
import java.util.Iterator; import java.util.Iterator;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -66,6 +67,12 @@ class LocalResourcesTrackerImpl implements LocalResourcesTracker {
*/ */
private ConcurrentHashMap<LocalResourceRequest, Path> private ConcurrentHashMap<LocalResourceRequest, Path>
inProgressLocalResourcesMap; inProgressLocalResourcesMap;
/*
* starting with 10 to accommodate 0-9 directories created as a part of
* LocalCacheDirectoryManager. So there will be one unique number generator
* per APPLICATION, USER and PUBLIC cache.
*/
private AtomicLong uniqueNumberGenerator = new AtomicLong(9);
public LocalResourcesTrackerImpl(String user, Dispatcher dispatcher, public LocalResourcesTrackerImpl(String user, Dispatcher dispatcher,
boolean useLocalCacheDirectoryManager, Configuration conf) { boolean useLocalCacheDirectoryManager, Configuration conf) {
@ -283,4 +290,9 @@ public void localizationCompleted(LocalResourceRequest req,
} }
} }
} }
@Override
public long nextUniqueNumber() {
return uniqueNumberGenerator.incrementAndGet();
}
} }

View File

@ -34,7 +34,6 @@
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random;
import java.util.concurrent.CancellationException; import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletionService; import java.util.concurrent.CompletionService;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
@ -666,8 +665,11 @@ public void addResource(LocalizerResourceRequestEvent request) {
DiskChecker.checkDir( DiskChecker.checkDir(
new File(publicDirDestPath.toUri().getPath())); new File(publicDirDestPath.toUri().getPath()));
} }
publicDirDestPath =
new Path(publicDirDestPath, Long.toString(publicRsrc
.nextUniqueNumber()));
pending.put(queue.submit(new FSDownload( pending.put(queue.submit(new FSDownload(
lfs, null, conf, publicDirDestPath, resource, new Random())), lfs, null, conf, publicDirDestPath, resource)),
request); request);
attempts.put(key, new LinkedList<LocalizerResourceRequestEvent>()); attempts.put(key, new LinkedList<LocalizerResourceRequestEvent>());
} catch (IOException e) { } catch (IOException e) {
@ -955,9 +957,9 @@ private Path getPathForLocalization(LocalResource rsrc) throws IOException,
Path dirPath = Path dirPath =
dirsHandler.getLocalPathForWrite(cacheDirectory, dirsHandler.getLocalPathForWrite(cacheDirectory,
ContainerLocalizer.getEstimatedSize(rsrc), false); ContainerLocalizer.getEstimatedSize(rsrc), false);
return tracker.getPathForLocalization(new LocalResourceRequest(rsrc), dirPath = tracker.getPathForLocalization(new LocalResourceRequest(rsrc),
dirPath); dirPath);
return new Path (dirPath, Long.toString(tracker.nextUniqueNumber()));
} }
@Override @Override

View File

@ -520,7 +520,10 @@ public boolean matches(Object o) {
new LocalResourceRequest(response.getResourceSpecs().get(0).getResource())); new LocalResourceRequest(response.getResourceSpecs().get(0).getResource()));
URL localizedPath = URL localizedPath =
response.getResourceSpecs().get(0).getDestinationDirectory(); response.getResourceSpecs().get(0).getDestinationDirectory();
assertTrue(localizedPath.getFile().endsWith(localPath)); // Appending to local path unique number(10) generated as a part of
// LocalResourcesTracker
assertTrue(localizedPath.getFile().endsWith(
localPath + Path.SEPARATOR + "10"));
// get second resource // get second resource
response = spyService.heartbeat(stat); response = spyService.heartbeat(stat);
@ -534,7 +537,7 @@ public boolean matches(Object o) {
// LocalCacheDirectoryManager will be used and we have restricted number // LocalCacheDirectoryManager will be used and we have restricted number
// of files per directory to 1. // of files per directory to 1.
assertTrue(localizedPath.getFile().endsWith( assertTrue(localizedPath.getFile().endsWith(
localPath + Path.SEPARATOR + "0")); localPath + Path.SEPARATOR + "0" + Path.SEPARATOR + "11"));
// empty rsrc // empty rsrc
response = spyService.heartbeat(stat); response = spyService.heartbeat(stat);