From 735b50e8bd23f7fbeff3a08cf8f3fff8cbff7449 Mon Sep 17 00:00:00 2001
From: Thomas Graves
Date: Tue, 31 Jul 2012 19:20:03 +0000
Subject: [PATCH] MAPREDUCE-4493. Distibuted Cache Compatability Issues (Robert
Evans via tgraves)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1367713 13f79535-47bb-0310-9956-ffa450edef68
---
.../src/site/apt/DeprecatedProperties.apt.vm | 4 ++-
hadoop-mapreduce-project/CHANGES.txt | 3 ++
.../mapred/LocalDistributedCacheManager.java | 9 ++---
.../TestLocalDistributedCacheManager.java | 3 --
.../mapred/TestMRWithDistributedCache.java | 1 -
.../hadoop/filecache/DistributedCache.java | 11 +++---
.../apache/hadoop/mapred/pipes/Submitter.java | 1 -
.../java/org/apache/hadoop/mapreduce/Job.java | 5 +--
.../apache/hadoop/mapreduce/JobContext.java | 15 ++++++--
.../apache/hadoop/mapreduce/JobSubmitter.java | 2 --
.../apache/hadoop/mapreduce/MRJobConfig.java | 4 +++
.../mapreduce/filecache/DistributedCache.java | 35 +++++++++---------
.../hadoop/mapreduce/util/ConfigUtil.java | 2 --
.../org/apache/hadoop/mapred/MRCaching.java | 36 +++++--------------
.../hadoop/mapred/TestMiniMRDFSCaching.java | 4 +--
.../hadoop/mapreduce/v2/TestMRJobs.java | 2 +-
.../v2/TestSpeculativeExecution.java | 1 -
.../java/org/apache/hadoop/examples/Sort.java | 1 -
.../hadoop/examples/terasort/TeraSort.java | 3 +-
.../apache/hadoop/streaming/StreamJob.java | 1 -
20 files changed, 63 insertions(+), 80 deletions(-)
diff --git a/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm b/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm
index c391587db17..dcb0be7bd3b 100644
--- a/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm
+++ b/hadoop-common-project/hadoop-common/src/site/apt/DeprecatedProperties.apt.vm
@@ -196,7 +196,9 @@ Deprecated Properties
*---+---+
|mapred.compress.map.output | mapreduce.map.output.compress
*---+---+
-|mapred.create.symlink | mapreduce.job.cache.symlink.create
+|mapred.create.symlink | NONE - symlinking is always on
+*---+---+
+|mapreduce.job.cache.symlink.create | NONE - symlinking is always on
*---+---+
|mapred.data.field.separator | mapreduce.fieldsel.data.field.separator
*---+---+
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index dd812f94fb3..9f83acd5870 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -771,6 +771,9 @@ Release 0.23.3 - UNRELEASED
MAPREDUCE-4496. AM logs link is missing user name (Jason Lowe via bobby)
+ MAPREDUCE-4493. Distibuted Cache Compatability Issues (Robert Evans
+ via tgraves)
+
Release 0.23.2 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java
index 85cef6e2a6c..fb26245cd8e 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalDistributedCacheManager.java
@@ -84,7 +84,6 @@ class LocalDistributedCacheManager {
* @throws IOException
*/
public void setup(JobConf conf) throws IOException {
- boolean mkLinks = DistributedCache.getSymlink(conf);
File workDir = new File(System.getProperty("user.dir"));
// Generate YARN local resources objects corresponding to the distributed
@@ -145,11 +144,9 @@ class LocalDistributedCacheManager {
throw new IOException(e);
}
String pathString = path.toUri().toString();
- if(mkLinks) {
- String link = entry.getKey();
- String target = new File(path.toUri()).getPath();
- symlink(workDir, target, link);
- }
+ String link = entry.getKey();
+ String target = new File(path.toUri()).getPath();
+ symlink(workDir, target, link);
if (resource.getType() == LocalResourceType.ARCHIVE) {
localArchives.add(pathString);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java
index 368ac2bceda..b131473d63d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalDistributedCacheManager.java
@@ -150,7 +150,6 @@ public class TestLocalDistributedCacheManager {
conf.set(MRJobConfig.CACHE_FILES_SIZES, "201");
conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "false");
conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath());
- conf.set(MRJobConfig.CACHE_SYMLINK, "yes");
LocalDistributedCacheManager manager = new LocalDistributedCacheManager();
try {
manager.setup(conf);
@@ -197,7 +196,6 @@ public class TestLocalDistributedCacheManager {
conf.set(MRJobConfig.CACHE_FILES, "");
conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath());
- conf.set(MRJobConfig.CACHE_SYMLINK, "yes");
LocalDistributedCacheManager manager = new LocalDistributedCacheManager();
try {
manager.setup(conf);
@@ -268,7 +266,6 @@ public class TestLocalDistributedCacheManager {
conf.set(MRJobConfig.CACHE_FILES_SIZES, "201,201");
conf.set(MRJobConfig.CACHE_FILE_VISIBILITIES, "false,false");
conf.set(MRConfig.LOCAL_DIR, localDir.getAbsolutePath());
- conf.set(MRJobConfig.CACHE_SYMLINK, "yes");
LocalDistributedCacheManager manager = new LocalDistributedCacheManager();
try {
manager.setup(conf);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java
index c9ce7cba9b7..b704c77c96a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java
@@ -146,7 +146,6 @@ public class TestMRWithDistributedCache extends TestCase {
job.addFileToClassPath(second);
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
- job.createSymlink();
job.setMaxMapAttempts(1); // speed up failures
job.submit();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
index 3b9424a81c9..0e7438fa4e6 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
@@ -48,8 +48,12 @@ import org.apache.hadoop.mapreduce.Job;
* Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes.
* Jars may be optionally added to the classpath of the tasks, a rudimentary
* software distribution mechanism. Files have execution permissions.
- * Optionally users can also direct it to symlink the distributed cache file(s)
- * into the working directory of the task.
+ * In older version of Hadoop Map/Reduce users could optionally ask for symlinks
+ * to be created in the working directory of the child task. In the current
+ * version symlinks are always created. If the URL does not have a fragment
+ * the name of the file or directory will be used. If multiple files or
+ * directories map to the same link name, the last one added, will be used. All
+ * others will not even be downloaded.
*
* DistributedCache
tracks modification timestamps of the cache
* files. Clearly the cache files should not be modified by the application
@@ -91,8 +95,7 @@ import org.apache.hadoop.mapreduce.Job;
*
* public void configure(JobConf job) {
* // Get the cached archives/files
- * localArchives = DistributedCache.getLocalCacheArchives(job);
- * localFiles = DistributedCache.getLocalCacheFiles(job);
+ * File f = new File("./map.zip/some/file/in/zip.txt");
* }
*
* public void map(K key, V value,
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java
index ddc3ae80172..57370872e30 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/pipes/Submitter.java
@@ -313,7 +313,6 @@ public class Submitter extends Configured implements Tool {
// add default debug script only when executable is expressed as
// #
if (exec.contains("#")) {
- DistributedCache.createSymlink(conf);
// set default gdb commands for map and reduce task
String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT,defScript);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
index 2fd666e8272..a2a59005b9d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java
@@ -1049,9 +1049,10 @@ public class Job extends JobContextImpl implements JobContext {
}
/**
- * This method allows you to create symlinks in the current working directory
- * of the task to all the cache files/archives
+ * Originally intended to enable symlinks, but currently symlinks cannot be
+ * disabled.
*/
+ @Deprecated
public void createSymlink() {
ensureState(JobState.DEFINE);
DistributedCache.createSymlink(conf);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
index 39325d1c112..4842e20b9c4 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
@@ -221,10 +221,11 @@ public interface JobContext extends MRJobConfig {
public String getUser();
/**
- * This method checks to see if symlinks are to be create for the
- * localized cache files in the current working directory
- * @return true if symlinks are to be created- else return false
+ * Originally intended to check if symlinks should be used, but currently
+ * symlinks cannot be disabled.
+ * @return true
*/
+ @Deprecated
public boolean getSymlink();
/**
@@ -251,14 +252,22 @@ public interface JobContext extends MRJobConfig {
* Return the path array of the localized caches
* @return A path array of localized caches
* @throws IOException
+ * @deprecated the array returned only includes the items the were
+ * downloaded. There is no way to map this to what is returned by
+ * {@link #getCacheArchives()}.
*/
+ @Deprecated
public Path[] getLocalCacheArchives() throws IOException;
/**
* Return the path array of the localized files
* @return A path array of localized files
* @throws IOException
+ * @deprecated the array returned only includes the items the were
+ * downloaded. There is no way to map this to what is returned by
+ * {@link #getCacheFiles()}.
*/
+ @Deprecated
public Path[] getLocalCacheFiles() throws IOException;
/**
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
index 9c271a02762..08a09c2a69d 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobSubmitter.java
@@ -190,7 +190,6 @@ class JobSubmitter {
//should not throw a uri exception
throw new IOException("Failed to create uri for " + tmpFile, ue);
}
- DistributedCache.createSymlink(conf);
}
}
@@ -225,7 +224,6 @@ class JobSubmitter {
//should not throw an uri excpetion
throw new IOException("Failed to create uri for " + tmpArchives, ue);
}
- DistributedCache.createSymlink(conf);
}
}
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
index 03efd7e9bf3..bbc8a63173b 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java
@@ -114,6 +114,10 @@ public interface MRJobConfig {
public static final String CACHE_ARCHIVES_VISIBILITIES = "mapreduce.job.cache.archives.visibilities";
+ /**
+ * @deprecated Symlinks are always on and cannot be disabled.
+ */
+ @Deprecated
public static final String CACHE_SYMLINK = "mapreduce.job.cache.symlink.create";
public static final String USER_LOG_RETAIN_HOURS = "mapreduce.job.userlog.retain.hours";
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
index 40403581421..903ad085c70 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
@@ -55,8 +55,12 @@ import java.net.URI;
* Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes.
* Jars may be optionally added to the classpath of the tasks, a rudimentary
* software distribution mechanism. Files have execution permissions.
- * Optionally users can also direct it to symlink the distributed cache file(s)
- * into the working directory of the task.
+ * In older version of Hadoop Map/Reduce users could optionally ask for symlinks
+ * to be created in the working directory of the child task. In the current
+ * version symlinks are always created. If the URL does not have a fragment
+ * the name of the file or directory will be used. If multiple files or
+ * directories map to the same link name, the last one added, will be used. All
+ * others will not even be downloaded.
*
* DistributedCache
tracks modification timestamps of the cache
* files. Clearly the cache files should not be modified by the application
@@ -98,8 +102,7 @@ import java.net.URI;
*
* public void configure(JobConf job) {
* // Get the cached archives/files
- * localArchives = DistributedCache.getLocalCacheArchives(job);
- * localFiles = DistributedCache.getLocalCacheFiles(job);
+ * File f = new File("./map.zip/some/file/in/zip.txt");
* }
*
* public void map(K key, V value,
@@ -375,32 +378,26 @@ public class DistributedCache {
}
/**
- * This method allows you to create symlinks in the current working directory
- * of the task to all the cache files/archives.
- * Intended to be used by user code.
+ * Originally intended to enable symlinks, but currently symlinks cannot be
+ * disabled. This is a NO-OP.
* @param conf the jobconf
- * @deprecated Use {@link Job#createSymlink()} instead
+ * @deprecated This is a NO-OP.
*/
@Deprecated
public static void createSymlink(Configuration conf){
- conf.set(MRJobConfig.CACHE_SYMLINK, "yes");
+ //NOOP
}
/**
- * This method checks to see if symlinks are to be create for the
- * localized cache files in the current working directory
- * Used by internal DistributedCache code.
+ * Originally intended to check if symlinks should be used, but currently
+ * symlinks cannot be disabled.
* @param conf the jobconf
- * @return true if symlinks are to be created- else return false
- * @deprecated Use {@link JobContext#getSymlink()} instead
+ * @return true
+ * @deprecated symlinks are always created.
*/
@Deprecated
public static boolean getSymlink(Configuration conf){
- String result = conf.get(MRJobConfig.CACHE_SYMLINK);
- if ("yes".equals(result)){
- return true;
- }
- return false;
+ return true;
}
private static boolean[] parseBooleans(String[] strs) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java
index fde6aa4647f..f5dbc3a7f1a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java
@@ -246,8 +246,6 @@ public class ConfigUtil {
new String[] {MRJobConfig.CACHE_FILE_TIMESTAMPS});
Configuration.addDeprecation("mapred.cache.archives.timestamps",
new String[] {MRJobConfig.CACHE_ARCHIVES_TIMESTAMPS});
- Configuration.addDeprecation("mapred.create.symlink",
- new String[] {MRJobConfig.CACHE_SYMLINK});
Configuration.addDeprecation("mapred.working.dir",
new String[] {MRJobConfig.WORKING_DIR});
Configuration.addDeprecation("user.name",
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java
index 02b6e58d0d2..2d5b8ccb096 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRCaching.java
@@ -210,19 +210,10 @@ public class MRCaching {
fs.copyFromLocalFile(tarPath1, cachePath);
fs.copyFromLocalFile(tarPath2, cachePath);
}
-
- public static TestResult launchMRCache(String indir,
- String outdir, String cacheDir,
- JobConf conf, String input)
- throws IOException {
- setupCache(cacheDir, FileSystem.get(conf));
- return launchMRCache(indir,outdir, cacheDir, conf, input, false);
- }
public static TestResult launchMRCache(String indir,
String outdir, String cacheDir,
- JobConf conf, String input,
- boolean withSymlink)
+ JobConf conf, String input)
throws IOException {
String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data","/tmp"))
.toString().replace(' ', '+');
@@ -256,24 +247,13 @@ public class MRCaching {
conf.setNumReduceTasks(1);
conf.setSpeculativeExecution(false);
URI[] uris = new URI[6];
- if (!withSymlink) {
- conf.setMapperClass(MRCaching.MapClass.class);
- uris[0] = fs.getUri().resolve(cacheDir + "/test.txt");
- uris[1] = fs.getUri().resolve(cacheDir + "/test.jar");
- uris[2] = fs.getUri().resolve(cacheDir + "/test.zip");
- uris[3] = fs.getUri().resolve(cacheDir + "/test.tgz");
- uris[4] = fs.getUri().resolve(cacheDir + "/test.tar.gz");
- uris[5] = fs.getUri().resolve(cacheDir + "/test.tar");
- } else {
- DistributedCache.createSymlink(conf);
- conf.setMapperClass(MRCaching.MapClass2.class);
- uris[0] = fs.getUri().resolve(cacheDir + "/test.txt#" + "test.txt");
- uris[1] = fs.getUri().resolve(cacheDir + "/test.jar#" + "testjar");
- uris[2] = fs.getUri().resolve(cacheDir + "/test.zip#" + "testzip");
- uris[3] = fs.getUri().resolve(cacheDir + "/test.tgz#" + "testtgz");
- uris[4] = fs.getUri().resolve(cacheDir + "/test.tar.gz#" + "testtargz");
- uris[5] = fs.getUri().resolve(cacheDir + "/test.tar#" + "testtar");
- }
+ conf.setMapperClass(MRCaching.MapClass2.class);
+ uris[0] = fs.getUri().resolve(cacheDir + "/test.txt");
+ uris[1] = fs.getUri().resolve(cacheDir + "/test.jar");
+ uris[2] = fs.getUri().resolve(cacheDir + "/test.zip");
+ uris[3] = fs.getUri().resolve(cacheDir + "/test.tgz");
+ uris[4] = fs.getUri().resolve(cacheDir + "/test.tar.gz");
+ uris[5] = fs.getUri().resolve(cacheDir + "/test.tar");
DistributedCache.addCacheFile(uris[0], conf);
// Save expected file sizes
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java
index 6e8abd73a06..cda200e97e5 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRDFSCaching.java
@@ -48,7 +48,7 @@ public class TestMiniMRDFSCaching extends TestCase {
"/cachedir",
mr.createJobConf(),
"The quick brown fox\nhas many silly\n"
- + "red fox sox\n", false);
+ + "red fox sox\n");
assertTrue("Archives not matching", ret.isOutputOk);
// launch MR cache with symlinks
ret = MRCaching.launchMRCache("/testing/wc/input",
@@ -56,7 +56,7 @@ public class TestMiniMRDFSCaching extends TestCase {
"/cachedir",
mr.createJobConf(),
"The quick brown fox\nhas many silly\n"
- + "red fox sox\n", true);
+ + "red fox sox\n");
assertTrue("Archives not matching", ret.isOutputOk);
} finally {
if (fileSys != null) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
index 59b8689b20d..7b6ecd25560 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestMRJobs.java
@@ -211,6 +211,7 @@ public class TestMRJobs {
Path outputDir =
new Path(mrCluster.getTestWorkDir().getAbsolutePath(), "random-output");
FileOutputFormat.setOutputPath(job, outputDir);
+ job.setSpeculativeExecution(false);
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setJarByClass(RandomTextWriterJob.class);
job.setMaxMapAttempts(1); // speed up failures
@@ -462,7 +463,6 @@ public class TestMRJobs {
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
- job.createSymlink();
job.setMaxMapAttempts(1); // speed up failures
job.submit();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java
index 90f7cafc632..17d82d6dacc 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/TestSpeculativeExecution.java
@@ -301,7 +301,6 @@ public class TestSpeculativeExecution {
// Creates the Job Configuration
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
- job.createSymlink();
job.setMaxMapAttempts(2);
job.submit();
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java
index 901934bbf98..5184bf0a58a 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/Sort.java
@@ -167,7 +167,6 @@ public class Sort extends Configured implements Tool {
URI partitionUri = new URI(partitionFile.toString() +
"#" + "_sortPartitioning");
DistributedCache.addCacheFile(partitionUri, conf);
- DistributedCache.createSymlink(conf);
}
System.out.println("Running on " +
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java
index 7daa3016c21..09ab4b7e530 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraSort.java
@@ -305,8 +305,7 @@ public class TeraSort extends Configured implements Tool {
LOG.error(e.getMessage());
return -1;
}
- job.addCacheFile(partitionUri);
- job.createSymlink();
+ job.addCacheFile(partitionUri);
long end = System.currentTimeMillis();
System.out.println("Spent " + (end - start) + "ms computing partitions.");
job.setPartitionerClass(TotalOrderPartitioner.class);
diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
index b014cde6225..7076b94547c 100644
--- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
+++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java
@@ -958,7 +958,6 @@ public class StreamJob implements Tool {
if (!b)
fail(LINK_URI);
}
- DistributedCache.createSymlink(jobConf_);
// set the jobconf for the caching parameters
if (cacheArchives != null)
DistributedCache.setCacheArchives(archiveURIs, jobConf_);