From 6cb1cf3d1a23c2dd68c3aef975429a4451ff04b4 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 30 May 2013 01:24:15 +0000 Subject: [PATCH] MAPREDUCE-5263. Bring back old methods and fields in filecache.DistributedCache for binary compatibility with mapred in 1.x. Contributed by Zhijie Shen. svn merge --ignore-ancestry -c 1487695 ../../trunk/ git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1487696 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 4 + .../mapred/TestMRWithDistributedCache.java | 94 ++++++++ .../hadoop/filecache/DistributedCache.java | 204 +++++++++++++++++- .../hadoop/mapreduce/util/ConfigUtil.java | 5 + 4 files changed, 305 insertions(+), 2 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index a0fce4b780b..4bca7fe849a 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -104,6 +104,10 @@ Release 2.0.5-beta - UNRELEASED MAPREDUCE-5270. Migrated MR app from using BuilderUtil factory methods to individual record factory methods. (Jian He via vinodkv) + MAPREDUCE-5263. Bring back old methods and fields in + filecache.DistributedCache for binary compatibility with mapred in 1.x. + (Zhijie Shen via vinodkv) + OPTIMIZATIONS MAPREDUCE-4974. Optimising the LineRecordReader initialize() method diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java index b704c77c96a..065088d4fae 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestMRWithDistributedCache.java @@ -27,12 +27,15 @@ import java.util.Arrays; import java.util.jar.JarOutputStream; import java.util.zip.ZipEntry; +import junit.framework.Assert; import junit.framework.TestCase; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; @@ -44,6 +47,7 @@ import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig; +import org.junit.Test; /** * Tests the use of the @@ -59,6 +63,7 @@ import org.apache.hadoop.mapreduce.server.jobtracker.JTConfig; * * This test is not fast: it uses MiniMRCluster. */ +@SuppressWarnings("deprecation") public class TestMRWithDistributedCache extends TestCase { private static Path TEST_ROOT_DIR = new Path(System.getProperty("test.build.data","/tmp")); @@ -187,4 +192,93 @@ public class TestMRWithDistributedCache extends TestCase { jos.close(); return p; } + + @Test (timeout = 1000) + public void testDeprecatedFunctions() throws Exception { + DistributedCache.addLocalArchives(conf, "Test Local Archives 1"); + Assert.assertEquals("Test Local Archives 1", + conf.get(DistributedCache.CACHE_LOCALARCHIVES)); + Assert.assertEquals(1, + DistributedCache.getLocalCacheArchives(conf).length); + Assert.assertEquals("Test Local Archives 1", + DistributedCache.getLocalCacheArchives(conf)[0].getName()); + DistributedCache.addLocalArchives(conf, "Test Local Archives 2"); + Assert.assertEquals("Test Local Archives 1,Test Local Archives 2", + conf.get(DistributedCache.CACHE_LOCALARCHIVES)); + Assert.assertEquals(2, + DistributedCache.getLocalCacheArchives(conf).length); + Assert.assertEquals("Test Local Archives 2", + DistributedCache.getLocalCacheArchives(conf)[1].getName()); + DistributedCache.setLocalArchives(conf, "Test Local Archives 3"); + Assert.assertEquals("Test Local Archives 3", + conf.get(DistributedCache.CACHE_LOCALARCHIVES)); + Assert.assertEquals(1, + DistributedCache.getLocalCacheArchives(conf).length); + Assert.assertEquals("Test Local Archives 3", + DistributedCache.getLocalCacheArchives(conf)[0].getName()); + + DistributedCache.addLocalFiles(conf, "Test Local Files 1"); + Assert.assertEquals("Test Local Files 1", + conf.get(DistributedCache.CACHE_LOCALFILES)); + Assert.assertEquals(1, + DistributedCache.getLocalCacheFiles(conf).length); + Assert.assertEquals("Test Local Files 1", + DistributedCache.getLocalCacheFiles(conf)[0].getName()); + DistributedCache.addLocalFiles(conf, "Test Local Files 2"); + Assert.assertEquals("Test Local Files 1,Test Local Files 2", + conf.get(DistributedCache.CACHE_LOCALFILES)); + Assert.assertEquals(2, + DistributedCache.getLocalCacheFiles(conf).length); + Assert.assertEquals("Test Local Files 2", + DistributedCache.getLocalCacheFiles(conf)[1].getName()); + DistributedCache.setLocalFiles(conf, "Test Local Files 3"); + Assert.assertEquals("Test Local Files 3", + conf.get(DistributedCache.CACHE_LOCALFILES)); + Assert.assertEquals(1, + DistributedCache.getLocalCacheFiles(conf).length); + Assert.assertEquals("Test Local Files 3", + DistributedCache.getLocalCacheFiles(conf)[0].getName()); + + DistributedCache.setArchiveTimestamps(conf, "1234567890"); + Assert.assertEquals(1234567890, + conf.getLong(DistributedCache.CACHE_ARCHIVES_TIMESTAMPS, 0)); + Assert.assertEquals(1, + DistributedCache.getArchiveTimestamps(conf).length); + Assert.assertEquals(1234567890, + Long.parseLong(DistributedCache.getArchiveTimestamps(conf)[0])); + DistributedCache.setFileTimestamps(conf, "1234567890"); + Assert.assertEquals(1234567890, + conf.getLong(DistributedCache.CACHE_FILES_TIMESTAMPS, 0)); + Assert.assertEquals(1, + DistributedCache.getFileTimestamps(conf).length); + Assert.assertEquals(1234567890, + Long.parseLong(DistributedCache.getFileTimestamps(conf)[0])); + + DistributedCache.createAllSymlink(conf, new File("Test Job Cache Dir"), + new File("Test Work Dir")); + Assert.assertNull(conf.get(DistributedCache.CACHE_SYMLINK)); + Assert.assertTrue(DistributedCache.getSymlink(conf)); + + Assert.assertTrue(symlinkFile.createNewFile()); + FileStatus fileStatus = + DistributedCache.getFileStatus(conf, symlinkFile.toURI()); + Assert.assertNotNull(fileStatus); + Assert.assertEquals(fileStatus.getModificationTime(), + DistributedCache.getTimestamp(conf, symlinkFile.toURI())); + Assert.assertTrue(symlinkFile.delete()); + + DistributedCache.addCacheArchive(symlinkFile.toURI(), conf); + Assert.assertEquals(symlinkFile.toURI().toString(), + conf.get(DistributedCache.CACHE_ARCHIVES)); + Assert.assertEquals(1, DistributedCache.getCacheArchives(conf).length); + Assert.assertEquals(symlinkFile.toURI(), + DistributedCache.getCacheArchives(conf)[0]); + + DistributedCache.addCacheFile(symlinkFile.toURI(), conf); + Assert.assertEquals(symlinkFile.toURI().toString(), + conf.get(DistributedCache.CACHE_FILES)); + Assert.assertEquals(1, DistributedCache.getCacheFiles(conf).length); + Assert.assertEquals(symlinkFile.toURI(), + DistributedCache.getCacheFiles(conf)[0]); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java index 0e7438fa4e6..370d67deb64 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java @@ -18,12 +18,17 @@ package org.apache.hadoop.filecache; +import java.io.File; +import java.io.IOException; +import java.net.URI; + import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.MRJobConfig; /** * Distribute application-specific large, read-only files efficiently. @@ -123,9 +128,204 @@ import org.apache.hadoop.mapreduce.Job; * @see org.apache.hadoop.mapred.JobClient * @see org.apache.hadoop.mapreduce.Job */ +@SuppressWarnings("deprecation") @InterfaceAudience.Public @InterfaceStability.Stable +@Deprecated public class DistributedCache extends org.apache.hadoop.mapreduce.filecache.DistributedCache { - // + /** + * Warning: {@link #CACHE_FILES_SIZES} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_FILES_SIZES} + */ + @Deprecated + public static final String CACHE_FILES_SIZES = + "mapred.cache.files.filesizes"; + + /** + * Warning: {@link #CACHE_ARCHIVES_SIZES} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_ARCHIVES_SIZES} + */ + @Deprecated + public static final String CACHE_ARCHIVES_SIZES = + "mapred.cache.archives.filesizes"; + + /** + * Warning: {@link #CACHE_ARCHIVES_TIMESTAMPS} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS} + */ + @Deprecated + public static final String CACHE_ARCHIVES_TIMESTAMPS = + "mapred.cache.archives.timestamps"; + + /** + * Warning: {@link #CACHE_FILES_TIMESTAMPS} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_FILE_TIMESTAMPS} + */ + @Deprecated + public static final String CACHE_FILES_TIMESTAMPS = + "mapred.cache.files.timestamps"; + + /** + * Warning: {@link #CACHE_ARCHIVES} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_ARCHIVES} + */ + @Deprecated + public static final String CACHE_ARCHIVES = "mapred.cache.archives"; + + /** + * Warning: {@link #CACHE_FILES} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_FILES} + */ + @Deprecated + public static final String CACHE_FILES = "mapred.cache.files"; + + /** + * Warning: {@link #CACHE_LOCALARCHIVES} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_LOCALARCHIVES} + */ + @Deprecated + public static final String CACHE_LOCALARCHIVES = + "mapred.cache.localArchives"; + + /** + * Warning: {@link #CACHE_LOCALFILES} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_LOCALFILES} + */ + @Deprecated + public static final String CACHE_LOCALFILES = "mapred.cache.localFiles"; + + /** + * Warning: {@link #CACHE_SYMLINK} is not a *public* constant. + * The variable is kept for M/R 1.x applications, M/R 2.x applications should + * use {@link MRJobConfig#CACHE_SYMLINK} + */ + @Deprecated + public static final String CACHE_SYMLINK = "mapred.create.symlink"; + + /** + * Add a archive that has been localized to the conf. Used + * by internal DistributedCache code. + * @param conf The conf to modify to contain the localized caches + * @param str a comma separated list of local archives + */ + @Deprecated + public static void addLocalArchives(Configuration conf, String str) { + String archives = conf.get(CACHE_LOCALARCHIVES); + conf.set(CACHE_LOCALARCHIVES, archives == null ? str + : archives + "," + str); + } + + /** + * Add a file that has been localized to the conf.. Used + * by internal DistributedCache code. + * @param conf The conf to modify to contain the localized caches + * @param str a comma separated list of local files + */ + @Deprecated + public static void addLocalFiles(Configuration conf, String str) { + String files = conf.get(CACHE_LOCALFILES); + conf.set(CACHE_LOCALFILES, files == null ? str + : files + "," + str); + } + + /** + * This method create symlinks for all files in a given dir in another + * directory. Currently symlinks cannot be disabled. This is a NO-OP. + * + * @param conf the configuration + * @param jobCacheDir the target directory for creating symlinks + * @param workDir the directory in which the symlinks are created + * @throws IOException + * @deprecated Internal to MapReduce framework. Use DistributedCacheManager + * instead. + */ + @Deprecated + public static void createAllSymlink( + Configuration conf, File jobCacheDir, File workDir) + throws IOException{ + // Do nothing + } + + /** + * Returns {@link FileStatus} of a given cache file on hdfs. Internal to + * MapReduce. + * @param conf configuration + * @param cache cache file + * @return FileStatus of a given cache file on hdfs + * @throws IOException + */ + @Deprecated + public static FileStatus getFileStatus(Configuration conf, URI cache) + throws IOException { + FileSystem fileSystem = FileSystem.get(cache, conf); + return fileSystem.getFileStatus(new Path(cache.getPath())); + } + + /** + * Returns mtime of a given cache file on hdfs. Internal to MapReduce. + * @param conf configuration + * @param cache cache file + * @return mtime of a given cache file on hdfs + * @throws IOException + */ + @Deprecated + public static long getTimestamp(Configuration conf, URI cache) + throws IOException { + return getFileStatus(conf, cache).getModificationTime(); + } + + /** + * This is to check the timestamp of the archives to be localized. + * Used by internal MapReduce code. + * @param conf Configuration which stores the timestamp's + * @param timestamps comma separated list of timestamps of archives. + * The order should be the same as the order in which the archives are added. + */ + @Deprecated + public static void setArchiveTimestamps(Configuration conf, String timestamps) { + conf.set(CACHE_ARCHIVES_TIMESTAMPS, timestamps); + } + + /** + * This is to check the timestamp of the files to be localized. + * Used by internal MapReduce code. + * @param conf Configuration which stores the timestamp's + * @param timestamps comma separated list of timestamps of files. + * The order should be the same as the order in which the files are added. + */ + @Deprecated + public static void setFileTimestamps(Configuration conf, String timestamps) { + conf.set(CACHE_FILES_TIMESTAMPS, timestamps); + } + + /** + * Set the conf to contain the location for localized archives. Used + * by internal DistributedCache code. + * @param conf The conf to modify to contain the localized caches + * @param str a comma separated list of local archives + */ + @Deprecated + public static void setLocalArchives(Configuration conf, String str) { + conf.set(CACHE_LOCALARCHIVES, str); + } + + /** + * Set the conf to contain the location for localized files. Used + * by internal DistributedCache code. + * @param conf The conf to modify to contain the localized caches + * @param str a comma separated list of local files + */ + @Deprecated + public static void setLocalFiles(Configuration conf, String str) { + conf.set(CACHE_LOCALFILES, str); + } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java index e6ec306d831..be25a8f3286 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java @@ -47,6 +47,7 @@ public class ConfigUtil { /** * Adds deprecated keys and the corresponding new keys to the Configuration */ + @SuppressWarnings("deprecation") private static void addDeprecatedKeys() { Configuration.addDeprecation("mapred.temp.dir", new String[] {MRConfig.TEMP_DIR}); @@ -242,6 +243,10 @@ public class ConfigUtil { new String[] {MRJobConfig.CACHE_LOCALFILES}); Configuration.addDeprecation("mapred.cache.localArchives", new String[] {MRJobConfig.CACHE_LOCALARCHIVES}); + Configuration.addDeprecation("mapred.cache.files.filesizes", + new String[] {MRJobConfig.CACHE_FILES_SIZES}); + Configuration.addDeprecation("mapred.cache.archives.filesizes", + new String[] {MRJobConfig.CACHE_ARCHIVES_SIZES}); Configuration.addDeprecation("mapred.cache.files.timestamps", new String[] {MRJobConfig.CACHE_FILE_TIMESTAMPS}); Configuration.addDeprecation("mapred.cache.archives.timestamps",