From 735b50e8bd23f7fbeff3a08cf8f3fff8cbff7449 Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Tue, 31 Jul 2012 19:20:03 +0000 Subject: [PATCH] MAPREDUCE-4493. Distibuted Cache Compatability Issues (Robert Evans via tgraves) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1367713 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/site/apt/DeprecatedProperties.apt.vm | 4 ++- hadoop-mapreduce-project/CHANGES.txt | 3 ++ .../mapred/LocalDistributedCacheManager.java | 9 ++--- .../TestLocalDistributedCacheManager.java | 3 -- .../mapred/TestMRWithDistributedCache.java | 1 - .../hadoop/filecache/DistributedCache.java | 11 +++--- .../apache/hadoop/mapred/pipes/Submitter.java | 1 - .../java/org/apache/hadoop/mapreduce/Job.java | 5 +-- .../apache/hadoop/mapreduce/JobContext.java | 15 ++++++-- .../apache/hadoop/mapreduce/JobSubmitter.java | 2 -- .../apache/hadoop/mapreduce/MRJobConfig.java | 4 +++ .../mapreduce/filecache/DistributedCache.java | 35 +++++++++--------- .../hadoop/mapreduce/util/ConfigUtil.java | 2 -- .../org/apache/hadoop/mapred/MRCaching.java | 36 +++++-------------- .../hadoop/mapred/TestMiniMRDFSCaching.java | 4 +-- .../hadoop/mapreduce/v2/TestMRJobs.java | 2 +- .../v2/TestSpeculativeExecution.java | 1 - .../java/org/apache/hadoop/examples/Sort.java | 1 - .../hadoop/examples/terasort/TeraSort.java | 3 +- .../apache/hadoop/streaming/StreamJob.java | 1 - 20 files changed, 63 insertions(+), 80 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm index c391587db17..dcb0be7bd3b 100644 --- a/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm +++ b/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm @@ -196,7 +196,9 @@ Deprecated Properties *---+---+ |mapred.compress.map.output | mapreduce.map.output.compress *---+---+ -|mapred.create.symlink | mapreduce.job.cache.symlink.create +|mapred.create.symlink | NONE - symlinking is always on +*---+---+ +|mapreduce.job.cache.symlink.create | NONE - symlinking is always on *---+---+ |mapred.data.field.separator | mapreduce.fieldsel.data.field.separator *---+---+ diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index dd812f94fb3..9f83acd5870 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -771,6 +771,9 @@ Release 0.23.3 - UNRELEASED MAPREDUCE-4496. AM logs link is missing user name (Jason Lowe via bobby) + MAPREDUCE-4493. Distibuted Cache Compatability Issues (Robert Evans + via tgraves) + Release 0.23.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java index 85cef6e2a6c..fb26245cd8e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java @@ -84,7 +84,6 @@ class LocalDistributedCacheManager { * @throws IOException */ public void setup(JobConf conf) throws IOException { - boolean mkLinks = DistributedCache.getSymlink(conf); File workDir = new File(System.getProperty("user.dir")); // Generate YARN local resources objects corresponding to the distributed @@ -145,11 +144,9 @@ class LocalDistributedCacheManager { throw new IOException(e); } String pathString = path.toUri().toString(); - if(mkLinks) { - String link = entry.getKey(); - String target = new File(path.toUri()).getPath(); - symlink(workDir, target, link); - } + String link = entry.getKey(); + String target = new File(path.toUri()).getPath(); + symlink(workDir, target, link); if (resource.getType() == LocalResourceType.ARCHIVE) { localArchives.add(pathString); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java index 368ac2bceda..b131473d63d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java @@ -150,7 +150,6 @@ public class TestLocalDistributedCacheManager { conf.set(MRJobConfig.CACHE_FILES_SIZES, "201"); conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "false"); conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath()); - conf.set(MRJobConfig.CACHE_SYMLINK, "yes"); LocalDistributedCacheManager manager = new LocalDistributedCacheManager(); try { manager.setup(conf); @@ -197,7 +196,6 @@ public class TestLocalDistributedCacheManager { conf.set(MRJobConfig.CACHE_FILES, ""); conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath()); - conf.set(MRJobConfig.CACHE_SYMLINK, "yes"); LocalDistributedCacheManager manager = new LocalDistributedCacheManager(); try { manager.setup(conf); @@ -268,7 +266,6 @@ public class TestLocalDistributedCacheManager { conf.set(MRJobConfig.CACHE_FILES_SIZES, "201,201"); conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "false,false"); conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath()); - conf.set(MRJobConfig.CACHE_SYMLINK, "yes"); LocalDistributedCacheManager manager = new LocalDistributedCacheManager(); try { manager.setup(conf); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java index c9ce7cba9b7..b704c77c96a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java @@ -146,7 +146,6 @@ public class TestMRWithDistributedCache extends TestCase { job.addFileToClassPath(second); job.addArchiveToClassPath(third); job.addCacheArchive(fourth.toUri()); - job.createSymlink(); job.setMaxMapAttempts(1); // speed up failures job.submit(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java index 3b9424a81c9..0e7438fa4e6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java @@ -48,8 +48,12 @@ import org.apache.hadoop.mapreduce.Job; * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. * Jars may be optionally added to the classpath of the tasks, a rudimentary * software distribution mechanism. Files have execution permissions. - * Optionally users can also direct it to symlink the distributed cache file(s) - * into the working directory of the task.

+ * In older version of Hadoop Map/Reduce users could optionally ask for symlinks + * to be created in the working directory of the child task. In the current + * version symlinks are always created. If the URL does not have a fragment + * the name of the file or directory will be used. If multiple files or + * directories map to the same link name, the last one added, will be used. All + * others will not even be downloaded.

* *

DistributedCache tracks modification timestamps of the cache * files. Clearly the cache files should not be modified by the application @@ -91,8 +95,7 @@ import org.apache.hadoop.mapreduce.Job; * * public void configure(JobConf job) { * // Get the cached archives/files - * localArchives = DistributedCache.getLocalCacheArchives(job); - * localFiles = DistributedCache.getLocalCacheFiles(job); + * File f = new File("./map.zip/some/file/in/zip.txt"); * } * * public void map(K key, V value, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java index ddc3ae80172..57370872e30 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java @@ -313,7 +313,6 @@ public class Submitter extends Configured implements Tool { // add default debug script only when executable is expressed as // # if (exec.contains("#")) { - DistributedCache.createSymlink(conf); // set default gdb commands for map and reduce task String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script"; setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT,defScript); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java index 2fd666e8272..a2a59005b9d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java @@ -1049,9 +1049,10 @@ public class Job extends JobContextImpl implements JobContext { } /** - * This method allows you to create symlinks in the current working directory - * of the task to all the cache files/archives + * Originally intended to enable symlinks, but currently symlinks cannot be + * disabled. */ + @Deprecated public void createSymlink() { ensureState(JobState.DEFINE); DistributedCache.createSymlink(conf); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java index 39325d1c112..4842e20b9c4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java @@ -221,10 +221,11 @@ public interface JobContext extends MRJobConfig { public String getUser(); /** - * This method checks to see if symlinks are to be create for the - * localized cache files in the current working directory - * @return true if symlinks are to be created- else return false + * Originally intended to check if symlinks should be used, but currently + * symlinks cannot be disabled. + * @return true */ + @Deprecated public boolean getSymlink(); /** @@ -251,14 +252,22 @@ public interface JobContext extends MRJobConfig { * Return the path array of the localized caches * @return A path array of localized caches * @throws IOException + * @deprecated the array returned only includes the items the were + * downloaded. There is no way to map this to what is returned by + * {@link #getCacheArchives()}. */ + @Deprecated public Path[] getLocalCacheArchives() throws IOException; /** * Return the path array of the localized files * @return A path array of localized files * @throws IOException + * @deprecated the array returned only includes the items the were + * downloaded. There is no way to map this to what is returned by + * {@link #getCacheFiles()}. */ + @Deprecated public Path[] getLocalCacheFiles() throws IOException; /** diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java index 9c271a02762..08a09c2a69d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java @@ -190,7 +190,6 @@ class JobSubmitter { //should not throw a uri exception throw new IOException("Failed to create uri for " + tmpFile, ue); } - DistributedCache.createSymlink(conf); } } @@ -225,7 +224,6 @@ class JobSubmitter { //should not throw an uri excpetion throw new IOException("Failed to create uri for " + tmpArchives, ue); } - DistributedCache.createSymlink(conf); } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 03efd7e9bf3..bbc8a63173b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -114,6 +114,10 @@ public interface MRJobConfig { public static final String CACHE_ARCHIVES_VISIBILITIES = "mapreduce.job.cache.archives.visibilities"; + /** + * @deprecated Symlinks are always on and cannot be disabled. + */ + @Deprecated public static final String CACHE_SYMLINK = "mapreduce.job.cache.symlink.create"; public static final String USER_LOG_RETAIN_HOURS = "mapreduce.job.userlog.retain.hours"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java index 40403581421..903ad085c70 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java @@ -55,8 +55,12 @@ import java.net.URI; * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. * Jars may be optionally added to the classpath of the tasks, a rudimentary * software distribution mechanism. Files have execution permissions. - * Optionally users can also direct it to symlink the distributed cache file(s) - * into the working directory of the task.

+ * In older version of Hadoop Map/Reduce users could optionally ask for symlinks + * to be created in the working directory of the child task. In the current + * version symlinks are always created. If the URL does not have a fragment + * the name of the file or directory will be used. If multiple files or + * directories map to the same link name, the last one added, will be used. All + * others will not even be downloaded.

* *

DistributedCache tracks modification timestamps of the cache * files. Clearly the cache files should not be modified by the application @@ -98,8 +102,7 @@ import java.net.URI; * * public void configure(JobConf job) { * // Get the cached archives/files - * localArchives = DistributedCache.getLocalCacheArchives(job); - * localFiles = DistributedCache.getLocalCacheFiles(job); + * File f = new File("./map.zip/some/file/in/zip.txt"); * } * * public void map(K key, V value, @@ -375,32 +378,26 @@ public class DistributedCache { } /** - * This method allows you to create symlinks in the current working directory - * of the task to all the cache files/archives. - * Intended to be used by user code. + * Originally intended to enable symlinks, but currently symlinks cannot be + * disabled. This is a NO-OP. * @param conf the jobconf - * @deprecated Use {@link Job#createSymlink()} instead + * @deprecated This is a NO-OP. */ @Deprecated public static void createSymlink(Configuration conf){ - conf.set(MRJobConfig.CACHE_SYMLINK, "yes"); + //NOOP } /** - * This method checks to see if symlinks are to be create for the - * localized cache files in the current working directory - * Used by internal DistributedCache code. + * Originally intended to check if symlinks should be used, but currently + * symlinks cannot be disabled. * @param conf the jobconf - * @return true if symlinks are to be created- else return false - * @deprecated Use {@link JobContext#getSymlink()} instead + * @return true + * @deprecated symlinks are always created. */ @Deprecated public static boolean getSymlink(Configuration conf){ - String result = conf.get(MRJobConfig.CACHE_SYMLINK); - if ("yes".equals(result)){ - return true; - } - return false; + return true; } private static boolean[] parseBooleans(String[] strs) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java index fde6aa4647f..f5dbc3a7f1a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java @@ -246,8 +246,6 @@ public class ConfigUtil { new String[] {MRJobConfig.CACHE_FILE_TIMESTAMPS}); Configuration.addDeprecation("mapred.cache.archives.timestamps", new String[] {MRJobConfig.CACHE_ARCHIVES_TIMESTAMPS}); - Configuration.addDeprecation("mapred.create.symlink", - new String[] {MRJobConfig.CACHE_SYMLINK}); Configuration.addDeprecation("mapred.working.dir", new String[] {MRJobConfig.WORKING_DIR}); Configuration.addDeprecation("user.name", diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java index 02b6e58d0d2..2d5b8ccb096 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java @@ -210,19 +210,10 @@ public class MRCaching { fs.copyFromLocalFile(tarPath1, cachePath); fs.copyFromLocalFile(tarPath2, cachePath); } - - public static TestResult launchMRCache(String indir, - String outdir, String cacheDir, - JobConf conf, String input) - throws IOException { - setupCache(cacheDir, FileSystem.get(conf)); - return launchMRCache(indir,outdir, cacheDir, conf, input, false); - } public static TestResult launchMRCache(String indir, String outdir, String cacheDir, - JobConf conf, String input, - boolean withSymlink) + JobConf conf, String input) throws IOException { String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data","/tmp")) .toString().replace(' ', '+'); @@ -256,24 +247,13 @@ public class MRCaching { conf.setNumReduceTasks(1); conf.setSpeculativeExecution(false); URI[] uris = new URI[6]; - if (!withSymlink) { - conf.setMapperClass(MRCaching.MapClass.class); - uris[0] = fs.getUri().resolve(cacheDir + "/test.txt"); - uris[1] = fs.getUri().resolve(cacheDir + "/test.jar"); - uris[2] = fs.getUri().resolve(cacheDir + "/test.zip"); - uris[3] = fs.getUri().resolve(cacheDir + "/test.tgz"); - uris[4] = fs.getUri().resolve(cacheDir + "/test.tar.gz"); - uris[5] = fs.getUri().resolve(cacheDir + "/test.tar"); - } else { - DistributedCache.createSymlink(conf); - conf.setMapperClass(MRCaching.MapClass2.class); - uris[0] = fs.getUri().resolve(cacheDir + "/test.txt#" + "test.txt"); - uris[1] = fs.getUri().resolve(cacheDir + "/test.jar#" + "testjar"); - uris[2] = fs.getUri().resolve(cacheDir + "/test.zip#" + "testzip"); - uris[3] = fs.getUri().resolve(cacheDir + "/test.tgz#" + "testtgz"); - uris[4] = fs.getUri().resolve(cacheDir + "/test.tar.gz#" + "testtargz"); - uris[5] = fs.getUri().resolve(cacheDir + "/test.tar#" + "testtar"); - } + conf.setMapperClass(MRCaching.MapClass2.class); + uris[0] = fs.getUri().resolve(cacheDir + "/test.txt"); + uris[1] = fs.getUri().resolve(cacheDir + "/test.jar"); + uris[2] = fs.getUri().resolve(cacheDir + "/test.zip"); + uris[3] = fs.getUri().resolve(cacheDir + "/test.tgz"); + uris[4] = fs.getUri().resolve(cacheDir + "/test.tar.gz"); + uris[5] = fs.getUri().resolve(cacheDir + "/test.tar"); DistributedCache.addCacheFile(uris[0], conf); // Save expected file sizes diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java index 6e8abd73a06..cda200e97e5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java @@ -48,7 +48,7 @@ public class TestMiniMRDFSCaching extends TestCase { "/cachedir", mr.createJobConf(), "The quick brown fox\nhas many silly\n" - + "red fox sox\n", false); + + "red fox sox\n"); assertTrue("Archives not matching", ret.isOutputOk); // launch MR cache with symlinks ret = MRCaching.launchMRCache("/testing/wc/input", @@ -56,7 +56,7 @@ public class TestMiniMRDFSCaching extends TestCase { "/cachedir", mr.createJobConf(), "The quick brown fox\nhas many silly\n" - + "red fox sox\n", true); + + "red fox sox\n"); assertTrue("Archives not matching", ret.isOutputOk); } finally { if (fileSys != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java index 59b8689b20d..7b6ecd25560 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java @@ -211,6 +211,7 @@ public class TestMRJobs { Path outputDir = new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "random-output"); FileOutputFormat.setOutputPath(job, outputDir); + job.setSpeculativeExecution(false); job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.setJarByClass(RandomTextWriterJob.class); job.setMaxMapAttempts(1); // speed up failures @@ -462,7 +463,6 @@ public class TestMRJobs { job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. job.addArchiveToClassPath(third); job.addCacheArchive(fourth.toUri()); - job.createSymlink(); job.setMaxMapAttempts(1); // speed up failures job.submit(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java index 90f7cafc632..17d82d6dacc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java @@ -301,7 +301,6 @@ public class TestSpeculativeExecution { // Creates the Job Configuration job.addFileToClassPath(APP_JAR); // The AppMaster jar itself. - job.createSymlink(); job.setMaxMapAttempts(2); job.submit(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java index 901934bbf98..5184bf0a58a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java @@ -167,7 +167,6 @@ public class Sort extends Configured implements Tool { URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, conf); - DistributedCache.createSymlink(conf); } System.out.println("Running on " + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java index 7daa3016c21..09ab4b7e530 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java @@ -305,8 +305,7 @@ public class TeraSort extends Configured implements Tool { LOG.error(e.getMessage()); return -1; } - job.addCacheFile(partitionUri); - job.createSymlink(); + job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index b014cde6225..7076b94547c 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -958,7 +958,6 @@ public class StreamJob implements Tool { if (!b) fail(LINK_URI); } - DistributedCache.createSymlink(jobConf_); // set the jobconf for the caching parameters if (cacheArchives != null) DistributedCache.setCacheArchives(archiveURIs, jobConf_);