MAPREDUCE-6865. Fix typo in javadoc for DistributedCache. Contributed by Attila Sasvari.

(cherry picked from commit 34a931c0fa)
2017-03-20 00:04:23 +05:30 · 2017-03-20 00:04:23 +05:30 · 0cfa6ec6c8
parent 5d7197b2e6
commit 0cfa6ec6c8
2 changed files with 97 additions and 97 deletions
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/filecache/DistributedCache.java
@ -32,79 +32,79 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 /**
 * Distribute application-specific large, read-only files efficiently.
- * 
+ *
 * <p><code>DistributedCache</code> is a facility provided by the Map-Reduce
 * framework to cache files (text, archives, jars etc.) needed by applications.
 * </p>
- * 
+ *
- * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached 
+ * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached
 * via the {@link org.apache.hadoop.mapred.JobConf}. The
 * <code>DistributedCache</code> assumes that the files specified via urls are
 * already present on the {@link FileSystem} at the path specified by the url
 * and are accessible by every machine in the cluster.</p>
- * 
+ *
- * <p>The framework will copy the necessary files on to the slave node before 
+ * <p>The framework will copy the necessary files on to the worker node before
- * any tasks for the job are executed on that node. Its efficiency stems from 
+ * any tasks for the job are executed on that node. Its efficiency stems from
- * the fact that the files are only copied once per job and the ability to 
+ * the fact that the files are only copied once per job and the ability to
- * cache archives which are un-archived on the slaves.</p> 
+ * cache archives which are un-archived on the workers.</p>
 *
 * <p><code>DistributedCache</code> can be used to distribute simple, read-only
- * data/text files and/or more complex types such as archives, jars etc. 
+ * data/text files and/or more complex types such as archives, jars etc.
- * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. 
+ * Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes.
- * Jars may be optionally added to the classpath of the tasks, a rudimentary 
+ * Jars may be optionally added to the classpath of the tasks, a rudimentary
 * software distribution mechanism.  Files have execution permissions.
 * In older version of Hadoop Map/Reduce users could optionally ask for symlinks
- * to be created in the working directory of the child task.  In the current 
+ * to be created in the working directory of the child task.  In the current
- * version symlinks are always created.  If the URL does not have a fragment 
+ * version symlinks are always created.  If the URL does not have a fragment
- * the name of the file or directory will be used. If multiple files or 
+ * the name of the file or directory will be used. If multiple files or
 * directories map to the same link name, the last one added, will be used.  All
 * others will not even be downloaded.</p>
- * 
+ *
- * <p><code>DistributedCache</code> tracks modification timestamps of the cache 
+ * <p><code>DistributedCache</code> tracks modification timestamps of the cache
- * files. Clearly the cache files should not be modified by the application 
+ * files. Clearly the cache files should not be modified by the application
 * or externally while the job is executing.</p>
- * 
+ *
- * <p>Here is an illustrative example on how to use the 
+ * <p>Here is an illustrative example on how to use the
 * <code>DistributedCache</code>:</p>
 * <p><blockquote><pre>
 *     // Setting up the cache for the application
- *     
+ *
 *     1. Copy the requisite files to the <code>FileSystem</code>:
- *     
+ *
- *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
+ *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
- *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+ *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
 *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
 *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
 *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
 *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
- *     
+ *
 *     2. Setup the application's <code>JobConf</code>:
- *     
+ *
 *     JobConf job = new JobConf();
- *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+ *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
 *                                   job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job);
 *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job);
- *     
+ *
 *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
 *     or {@link org.apache.hadoop.mapred.Reducer}:
- *     
+ *
- *     public static class MapClass extends MapReduceBase  
+ *     public static class MapClass extends MapReduceBase
 *     implements Mapper&lt;K, V, K, V&gt; {
- *     
+ *
 *       private Path[] localArchives;
 *       private Path[] localFiles;
- *       
+ *
 *       public void configure(JobConf job) {
 *         // Get the cached archives/files
 *         File f = new File("./map.zip/some/file/in/zip.txt");
 *       }
- *       
+ *
- *       public void map(K key, V value, 
+ *       public void map(K key, V value,
- *                       OutputCollector&lt;K, V&gt; output, Reporter reporter) 
+ *                       OutputCollector&lt;K, V&gt; output, Reporter reporter)
 *       throws IOException {
 *         // Use data from the cached archives/files here
 *         // ...
@ -112,7 +112,7 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
 *         output.collect(k, v);
 *       }
 *     }
- *     
+ *
 * </pre></blockquote>
 *
 * It is also very common to use the DistributedCache by using
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java
@ -34,79 +34,79 @@ import java.net.URI;
 /**
 * Distribute application-specific large, read-only files efficiently.
- * 
+ *
 * <p><code>DistributedCache</code> is a facility provided by the Map-Reduce
 * framework to cache files (text, archives, jars etc.) needed by applications.
 * </p>
- * 
+ *
- * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached 
+ * <p>Applications specify the files, via urls (hdfs:// or http://) to be cached
 * via the {@link org.apache.hadoop.mapred.JobConf}. The
 * <code>DistributedCache</code> assumes that the files specified via urls are
 * already present on the {@link FileSystem} at the path specified by the url
 * and are accessible by every machine in the cluster.</p>
- * 
+ *
- * <p>The framework will copy the necessary files on to the slave node before 
+ * <p>The framework will copy the necessary files on to the worker node before
- * any tasks for the job are executed on that node. Its efficiency stems from 
+ * any tasks for the job are executed on that node. Its efficiency stems from
- * the fact that the files are only copied once per job and the ability to 
+ * the fact that the files are only copied once per job and the ability to
- * cache archives which are un-archived on the slaves.</p> 
+ * cache archives which are un-archived on the workers.</p>
 *
 * <p><code>DistributedCache</code> can be used to distribute simple, read-only
- * data/text files and/or more complex types such as archives, jars etc. 
+ * data/text files and/or more complex types such as archives, jars etc.
- * Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. 
+ * Archives (zip, tar and tgz/tar.gz files) are un-archived at the worker nodes.
- * Jars may be optionally added to the classpath of the tasks, a rudimentary 
+ * Jars may be optionally added to the classpath of the tasks, a rudimentary
 * software distribution mechanism.  Files have execution permissions.
 * In older version of Hadoop Map/Reduce users could optionally ask for symlinks
- * to be created in the working directory of the child task.  In the current 
+ * to be created in the working directory of the child task.  In the current
- * version symlinks are always created.  If the URL does not have a fragment 
+ * version symlinks are always created.  If the URL does not have a fragment
- * the name of the file or directory will be used. If multiple files or 
+ * the name of the file or directory will be used. If multiple files or
 * directories map to the same link name, the last one added, will be used.  All
 * others will not even be downloaded.</p>
- * 
+ *
- * <p><code>DistributedCache</code> tracks modification timestamps of the cache 
+ * <p><code>DistributedCache</code> tracks modification timestamps of the cache
- * files. Clearly the cache files should not be modified by the application 
+ * files. Clearly the cache files should not be modified by the application
 * or externally while the job is executing.</p>
- * 
+ *
- * <p>Here is an illustrative example on how to use the 
+ * <p>Here is an illustrative example on how to use the
 * <code>DistributedCache</code>:</p>
 * <p><blockquote><pre>
 *     // Setting up the cache for the application
- *     
+ *
 *     1. Copy the requisite files to the <code>FileSystem</code>:
- *     
+ *
- *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat  
+ *     $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat
- *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip  
+ *     $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip
 *     $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar
 *     $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar
 *     $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz
 *     $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz
- *     
+ *
 *     2. Setup the application's <code>JobConf</code>:
- *     
+ *
 *     JobConf job = new JobConf();
- *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), 
+ *     DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"),
 *                                   job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/map.zip"), job);
 *     DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar"), job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz"), job);
- *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job);
+ *     DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz"), job);
- *     
+ *
 *     3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper}
 *     or {@link org.apache.hadoop.mapred.Reducer}:
- *     
+ *
- *     public static class MapClass extends MapReduceBase  
+ *     public static class MapClass extends MapReduceBase
 *     implements Mapper&lt;K, V, K, V&gt; {
- *     
+ *
 *       private Path[] localArchives;
 *       private Path[] localFiles;
- *       
+ *
 *       public void configure(JobConf job) {
 *         // Get the cached archives/files
 *         File f = new File("./map.zip/some/file/in/zip.txt");
 *       }
- *       
+ *
- *       public void map(K key, V value, 
+ *       public void map(K key, V value,
- *                       OutputCollector&lt;K, V&gt; output, Reporter reporter) 
+ *                       OutputCollector&lt;K, V&gt; output, Reporter reporter)
 *       throws IOException {
 *         // Use data from the cached archives/files here
 *         // ...
@ -114,7 +114,7 @@ import java.net.URI;
 *         output.collect(k, v);
 *       }
 *     }
- *     
+ *
 * </pre></blockquote>
 *
 * It is also very common to use the DistributedCache by using
@ -281,7 +281,7 @@ public class DistributedCache {
    conf.set(MRJobConfig.CACHE_ARCHIVES, archives == null ? uri.toString()
             : archives + "," + uri.toString());
  }
-  
+
  /**
   * Add a file to be localized to the conf.  The localized file will be
   * downloaded to the execution node(s), and a link will created to the
@ -370,7 +370,7 @@ public class DistributedCache {
  /**
   * Get the file entries in classpath as an array of Path.
   * Used by internal DistributedCache code.
-   * 
+   *
   * @param conf Configuration that contains the classpath setting
   * @deprecated Use {@link JobContext#getFileClassPaths()} instead
   * @see JobContext#getFileClassPaths()
@ -379,8 +379,8 @@ public class DistributedCache {
  public static Path[] getFileClassPaths(Configuration conf) {
    ArrayList<String> list = (ArrayList<String>)conf.getStringCollection(
                                MRJobConfig.CLASSPATH_FILES);
-    if (list.size() == 0) { 
+    if (list.size() == 0) {
-      return null; 
+      return null;
    }
    Path[] paths = new Path[list.size()];
    for (int i = 0; i < list.size(); i++) {
@ -392,7 +392,7 @@ public class DistributedCache {
  /**
   * Add an archive path to the current set of classpath entries. It adds the
   * archive to cache as well.  Intended to be used by user code.
-   * 
+   *
   * @param archive Path of the archive to be added
   * @param conf Configuration that contains the classpath setting
   * @deprecated Use {@link Job#addArchiveToClassPath(Path)} instead
@ -426,7 +426,7 @@ public class DistributedCache {
  /**
   * Get the archive entries in classpath as an array of Path.
   * Used by internal DistributedCache code.
-   * 
+   *
   * @param conf Configuration that contains the classpath setting
   * @deprecated Use {@link JobContext#getArchiveClassPaths()} instead 
   * @see JobContext#getArchiveClassPaths()
@ -435,8 +435,8 @@ public class DistributedCache {
  public static Path[] getArchiveClassPaths(Configuration conf) {
    ArrayList<String> list = (ArrayList<String>)conf.getStringCollection(
                                MRJobConfig.CLASSPATH_ARCHIVES);
-    if (list.size() == 0) { 
+    if (list.size() == 0) {
-      return null; 
+      return null;
    }
    Path[] paths = new Path[list.size()];
    for (int i = 0; i < list.size(); i++) {
@ -449,13 +449,13 @@ public class DistributedCache {
   * Originally intended to enable symlinks, but currently symlinks cannot be
   * disabled. This is a NO-OP.
   * @param conf the jobconf
-   * @deprecated This is a NO-OP. 
+   * @deprecated This is a NO-OP.
   */
  @Deprecated
  public static void createSymlink(Configuration conf){
    //NOOP
  }
-  
+
  /**
   * Originally intended to check if symlinks should be used, but currently
   * symlinks cannot be disabled.
@ -480,29 +480,29 @@ public class DistributedCache {
  }
  /**
-   * Get the booleans on whether the files are public or not.  Used by 
+   * Get the booleans on whether the files are public or not.  Used by
   * internal DistributedCache and MapReduce code.
   * @param conf The configuration which stored the timestamps
-   * @return a string array of booleans 
+   * @return a string array of booleans
   */
  public static boolean[] getFileVisibilities(Configuration conf) {
    return parseBooleans(conf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES));
  }
  /**
-   * Get the booleans on whether the archives are public or not.  Used by 
+   * Get the booleans on whether the archives are public or not.  Used by
   * internal DistributedCache and MapReduce code.
   * @param conf The configuration which stored the timestamps
-   * @return a string array of booleans 
+   * @return a string array of booleans
   */
  public static boolean[] getArchiveVisibilities(Configuration conf) {
    return parseBooleans(conf.getStrings(MRJobConfig.CACHE_ARCHIVES_VISIBILITIES));
  }
  /**
-   * This method checks if there is a conflict in the fragment names 
+   * This method checks if there is a conflict in the fragment names
-   * of the uris. Also makes sure that each uri has a fragment. It 
+   * of the uris. Also makes sure that each uri has a fragment. It
-   * is only to be called if you want to create symlinks for 
+   * is only to be called if you want to create symlinks for
   * the various archives and files.  May be used by user code.
   * @param uriFiles The uri array of urifiles
   * @param uriArchives the uri array of uri archives
@ -514,7 +514,7 @@ public class DistributedCache {
    // check if fragment is null for any uri
    // also check if there are any conflicts in fragment names
    Set<String> fragments = new HashSet<String>();
-    
+
    // iterate over file uris
    if (uriFiles != null) {
      for (int i = 0; i < uriFiles.length; i++) {
@ -529,7 +529,7 @@ public class DistributedCache {
        fragments.add(lowerCaseFragment);
      }
    }
-    
+
    // iterate over archive uris
    if (uriArchives != null) {
      for (int i = 0; i < uriArchives.length; i++) {