diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index 5fd7d30cea2..5524b14d213 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -181,6 +181,9 @@ Trunk (Unreleased)
MAPREDUCE-6234. TestHighRamJob fails due to the change in MAPREDUCE-5785.
(Masatake Iwasaki via kasha)
+ MAPREDUCE-5657. Fix Javadoc errors caused by incorrect or illegal tags in doc
+ comments. (Akira AJISAKA via ozawa)
+
BREAKDOWN OF MAPREDUCE-2841 (NATIVE TASK) SUBTASKS
MAPREDUCE-5985. native-task: Fix build on macosx. Contributed by
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
index 5f39edd72e8..c8f2427fcd0 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java
@@ -174,7 +174,7 @@ public class TaskAttemptListenerImpl extends CompositeService
/**
* Child checking whether it can commit.
*
- *
+ *
* Commit is a two-phased protocol. First the attempt informs the
* ApplicationMaster that it is
* {@link #commitPending(TaskAttemptID, TaskStatus)}. Then it repeatedly polls
@@ -208,7 +208,7 @@ public class TaskAttemptListenerImpl extends CompositeService
* TaskAttempt is reporting that it is in commit_pending and it is waiting for
* the commit Response
*
- *
+ *
* Commit it a two-phased protocol. First the attempt informs the
* ApplicationMaster that it is
* {@link #commitPending(TaskAttemptID, TaskStatus)}. Then it repeatedly polls
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java
index 981e6ffb4b4..05bb40bb9d9 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/JobEndNotifier.java
@@ -44,7 +44,6 @@ import org.mortbay.log.Log;
* proxy if needed
JobTracker
.
@@ -56,7 +56,7 @@ import org.apache.hadoop.util.StringInterner;
* Clients can query for the latest ClusterStatus
, via
* {@link JobClient#getClusterStatus()}.
Note: The following is valid only if the {@link OutputCommitter}
* is {@link FileOutputCommitter}. If OutputCommitter
is not
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java
index 30ebd6b8ca3..32e07e7b9fb 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/IFile.java
@@ -47,7 +47,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
- * IFile
is the simple IFile
is the simple <key-len, value-len, key, value> format
* for the intermediate map-outputs in Map-Reduce.
*
* There is a Writer
to write out map-outputs in this format and
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java
index 37633ab504a..0dbbe5a86c3 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobACLsManager.java
@@ -101,7 +101,6 @@ public class JobACLsManager {
* @param jobOperation
* @param jobOwner
* @param jobACL
- * @throws AccessControlException
*/
public boolean checkAccess(UserGroupInformation callerUGI,
JobACL jobOperation, String jobOwner, AccessControlList jobACL) {
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java
index 89a966eb84c..e91fbfed37c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobClient.java
@@ -79,7 +79,7 @@ import org.apache.hadoop.util.ToolRunner;
* Submitting the job to the cluster and optionally monitoring
* it's status.
*
- *
JobClient
to submit
@@ -101,9 +101,9 @@ import org.apache.hadoop.util.ToolRunner;
*
* // Submit the job, then poll for progress until the job is complete
* JobClient.runJob(job);
- *
+ *
*
- * At times clients would chain map-reduce jobs to accomplish complex tasks * which cannot be done via a single map-reduce job. This is fairly easy since @@ -127,7 +127,7 @@ import org.apache.hadoop.util.ToolRunner; * {@link JobConf#setJobEndNotificationURI(String)} : setup a notification * on job-completion, thus avoiding polling. * - *
+ * * * @see JobConf * @see ClusterStatus diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java index 315c8299872..c388bda4953 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java @@ -74,7 +74,7 @@ import org.apache.log4j.Level; * more complex for the user to control finely * (e.g. {@link #setNumMapTasks(int)}). * - * + * * *JobConf
typically specifies the {@link Mapper}, combiner
* (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
@@ -105,7 +105,7 @@ import org.apache.log4j.Level;
*
* job.setInputFormat(SequenceFileInputFormat.class);
* job.setOutputFormat(SequenceFileOutputFormat.class);
- *
* If the parameter {@code loadDefaults} is false, the new instance * will not load resources from the default files. * @@ -993,19 +993,19 @@ public class JobConf extends Configuration { /** * Set the user defined {@link RawComparator} comparator for * grouping keys in the input to the combiner. - *
+ * *This comparator should be provided if the equivalence rules for keys * for sorting the intermediates are different from those for grouping keys * before each call to * {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.
- * + * *For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed * in a single call to the reduce function if K1 and K2 compare as equal.
- * + * *Since {@link #setOutputKeyComparatorClass(Class)} can be used to control * how keys are sorted, this can be used in conjunction to simulate * secondary sort on values.
- * + * *Note: This is not a guarantee of the combiner sort being * stable in any sense. (In any case, with the order of available * map-outputs to the combiner being non-deterministic, it wouldn't make @@ -1210,7 +1210,7 @@ public class JobConf extends Configuration { *
Typically the combiner is same as the Reducer
for the
* job i.e. {@link #setReducerClass(Class)}.
The number of maps is usually driven by the total size of the inputs * i.e. total number of blocks of the input files.
@@ -1350,7 +1350,7 @@ public class JobConf extends Configuration { /** * Set the requisite number of reduce tasks for this job. * - *The right number of reduces seems to be 0.95
or
* 1.75
multiplied by (<no. of nodes> *
@@ -1370,7 +1370,7 @@ public class JobConf extends Configuration {
* reserve a few reduce slots in the framework for speculative-tasks, failures
* etc.
It is legal to set the number of reduce-tasks to zero
.
The debug command, run on the node where the map failed, is:
- *+ * * *+ ** $script $stdout $stderr $syslog $jobconf. - *
The script file is distributed through {@link DistributedCache} * APIs. The script needs to be symlinked.
@@ -1705,7 +1705,7 @@ public class JobConf extends Configuration { * job.setMapDebugScript("./myscript"); * DistributedCache.createSymlink(job); * DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); - * + * * * @param mDbgScript the script name */ @@ -1730,9 +1730,9 @@ public class JobConf extends Configuration { * is given task's stdout, stderr, syslog, jobconf files as arguments. * *The debug command, run on the node where the map failed, is:
- *+ * * *+ ** $script $stdout $stderr $syslog $jobconf. - *
The script file is distributed through {@link DistributedCache} * APIs. The script file needs to be symlinked
@@ -1742,7 +1742,7 @@ public class JobConf extends Configuration { * job.setReduceDebugScript("./myscript"); * DistributedCache.createSymlink(job); * DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); - * + * * * @param rDbgScript the script name */ @@ -1785,8 +1785,6 @@ public class JobConf extends Configuration { * * @param uri the job end notification uri * @see JobStatus - * @see Job Completion and Chaining */ public void setJobEndNotificationURI(String uri) { set(JobContext.MR_JOB_END_NOTIFICATION_URL, uri); @@ -1816,7 +1814,7 @@ public class JobConf extends Configuration { * * If a value is specified in the configuration, it is returned. * Else, it returns {@link JobContext#DEFAULT_MAP_MEMORY_MB}. - * + ** For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used @@ -1842,7 +1840,7 @@ public class JobConf extends Configuration { * * If a value is specified in the configuration, it is returned. * Else, it returns {@link JobContext#DEFAULT_REDUCE_MEMORY_MB}. - *
+ ** For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different * from {@link #DISABLED_MEMORY_LIMIT}, that value will be used @@ -1915,7 +1913,6 @@ public class JobConf extends Configuration { * * @param my_class the class to find. * @return a jar file that contains the class, or null. - * @throws IOException */ public static String findContainingJar(Class my_class) { return ClassUtil.findContainingJar(my_class); @@ -1924,10 +1921,10 @@ public class JobConf extends Configuration { /** * Get the memory required to run a task of this job, in bytes. See * {@link #MAPRED_TASK_MAXVMEM_PROPERTY} - *
+ ** This method is deprecated. Now, different memory limits can be * set for map and reduce tasks of a job, in MB. - *
+ ** For backward compatibility, if the job configuration sets the * key {@link #MAPRED_TASK_MAXVMEM_PROPERTY}, that value is returned. * Otherwise, this method will return the larger of the values returned by @@ -1953,7 +1950,7 @@ public class JobConf extends Configuration { /** * Set the maximum amount of memory any task of this job can use. See * {@link #MAPRED_TASK_MAXVMEM_PROPERTY} - *
+ *
* mapred.task.maxvmem is split into
* mapreduce.map.memory.mb
* and mapreduce.map.memory.mb,mapred
@@ -2073,7 +2070,7 @@ public class JobConf extends Configuration {
/**
* Parse the Maximum heap size from the java opts as specified by the -Xmx option
- * Format: -Xmx
Applications may write a custom {@link MapRunnable} to exert greater
* control on map processing e.g. multi-threaded Mapper
s etc.
* The Map/Reduce framework can be configured with one or more queues, * depending on the scheduler it is configured with. While some * schedulers work only with one queue, some schedulers support multiple * queues. Some schedulers also support the notion of queues within * queues - a feature called hierarchical queues. - *
+ ** Queue names are unique, and used as a key to lookup queues. Hierarchical * queues are named by a 'fully qualified name' such as q1:q2:q3, where * q2 is a child queue of q1 and q3 is a child queue of q2. - *
+ ** Leaf level queues are queues that contain no queues within them. Jobs * can be submitted only to leaf level queues. - *
+ ** Queues can be configured with various properties. Some of these * properties are common to all schedulers, and those are handled by this * class. Schedulers might also associate several custom properties with @@ -69,11 +69,11 @@ import java.net.URL; * provided by the framework, but define their own mechanisms. In such cases, * it is likely that the name of the queue will be used to relate the * common properties of a queue with scheduler specific properties. - *
+ ** Information related to a queue, such as its name, properties, scheduling * information and children are exposed by this class via a serializable * class called {@link JobQueueInfo}. - *
+ ** Queues are configured in the configuration file mapred-queues.xml. * To support backwards compatibility, queues can also be configured * in mapred-site.xml. However, when configured in the latter, there is @@ -102,7 +102,7 @@ public class QueueManager { /** * Factory method to create an appropriate instance of a queue * configuration parser. - *
+ ** Returns a parser that can parse either the deprecated property * style queue configuration in mapred-site.xml, or one that can * parse hierarchical queues in mapred-queues.xml. First preference @@ -157,7 +157,7 @@ public class QueueManager { /** * Construct a new QueueManager using configuration specified in the passed * in {@link org.apache.hadoop.conf.Configuration} object. - *
+ ** This instance supports queue configuration specified in mapred-site.xml, * but without support for hierarchical queues. If no queue configuration * is found in mapred-site.xml, it will then look for site configuration @@ -173,7 +173,7 @@ public class QueueManager { /** * Create an instance that supports hierarchical queues, defined in * the passed in configuration file. - *
+ ** This is mainly used for testing purposes and should not called from * production code. * @@ -208,7 +208,7 @@ public class QueueManager { /** * Return the set of leaf level queues configured in the system to * which jobs are submitted. - *
+ ** The number of queues configured should be dependent on the Scheduler * configured. Note that some schedulers work with only one queue, whereas * others can support multiple queues. @@ -222,7 +222,7 @@ public class QueueManager { /** * Return true if the given user is part of the ACL for the given * {@link QueueACL} name for the given queue. - *
+ ** An operation is allowed if all users are provided access for this * operation, or if either the user or any of the groups specified is * provided access. @@ -283,7 +283,7 @@ public class QueueManager { /** * Set a generic Object that represents scheduling information relevant * to a queue. - *
+ ** A string representation of this Object will be used by the framework * to display in user facing applications like the JobTracker web UI and * the hadoop CLI. @@ -323,7 +323,7 @@ public class QueueManager { /** * Refresh acls, state and scheduler properties for the configured queues. - *
+ ** This method reloads configuration related to queues, but does not * support changes to the list of queues or hierarchy. The expected usage * is that an administrator can modify the queue configuration file and @@ -431,7 +431,7 @@ public class QueueManager { /** * JobQueueInfo for all the queues. - *
+ ** Contribs can use this data structure to either create a hierarchy or for * traversing. * They can also use this to refresh properties in case of refreshQueues @@ -450,7 +450,7 @@ public class QueueManager { /** * Generates the array of QueueAclsInfo object. - *
+ ** The array consists of only those queues for which user has acls. * * @return QueueAclsInfo[] diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java index 0c95a147dc6..6e2c89fd8ca 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/RecordReader.java @@ -29,7 +29,7 @@ import org.apache.hadoop.classification.InterfaceStability; * *
RecordReader
, typically, converts the byte-oriented view of
* the input, provided by the InputSplit
, and presents a
- * record-oriented view for the {@link Mapper} & {@link Reducer} tasks for
+ * record-oriented view for the {@link Mapper} and {@link Reducer} tasks for
* processing. It thus assumes the responsibility of processing record
* boundaries and presenting the tasks with keys and values.
Reducer
is input the grouped output of a {@link Mapper}.
* In the phase the framework, for each Reducer
, fetches the
@@ -51,7 +51,7 @@ import org.apache.hadoop.io.Closeable;
*
The framework groups Reducer
inputs by key
s
* (since different Mapper
s may have output the same key) in this
@@ -60,7 +60,7 @@ import org.apache.hadoop.io.Closeable;
*
The shuffle and sort phases occur simultaneously i.e. while outputs are * being fetched they are merged.
* - *If equivalence rules for keys while grouping the intermediates are * different from those for grouping keys before reduction, then one may @@ -86,11 +86,11 @@ import org.apache.hadoop.io.Closeable; *
In this phase the
* {@link #reduce(Object, Iterator, OutputCollector, Reporter)}
- * method is called for each <key, (list of values)>
pair in
+ * method is called for each <key, (list of values)>
pair in
* the grouped inputs.
The output of the reduce task is typically written to the * {@link FileSystem} via @@ -156,7 +156,7 @@ import org.apache.hadoop.io.Closeable; * } * } * } - *
+ * * * @see Mapper * @see Partitioner @@ -171,7 +171,7 @@ public interface ReducerThe framework calls this method for each
- * <key, (list of values)>
pair in the grouped inputs.
+ * <key, (list of values)>
pair in the grouped inputs.
* Output values must be of the same type as input values. Input keys must
* not be altered. The framework will reuse the key and value objects
* that are passed into the reduce, therefore the application should clone
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
index 5df02c7b5b1..c3678d67065 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskUmbilicalProtocol.java
@@ -178,7 +178,6 @@ public interface TaskUmbilicalProtocol extends VersionedProtocol {
*
* @param taskID task's id
* @return the most recent checkpoint (if any) for this task
- * @throws IOException
*/
TaskCheckpointID getCheckpointID(TaskID taskID);
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java
index 14f040af96a..723a234d301 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainMapper.java
@@ -29,61 +29,61 @@ import java.io.IOException;
/**
* The ChainMapper class allows to use multiple Mapper classes within a single
* Map task.
- *
* The Mapper classes are invoked in a chained (or piped) fashion, the output of * the first becomes the input of the second, and so on until the last Mapper, * the output of the last Mapper will be written to the task's output. - *
+ ** The key functionality of this feature is that the Mappers in the chain do not * need to be aware that they are executed in a chain. This enables having * reusable specialized Mappers that can be combined to perform composite * operations within a single task. - *
+ ** Special care has to be taken when creating chains that the key/values output * by a Mapper are valid for the following Mapper in the chain. It is assumed * all Mappers and the Reduce in the chain use maching output and input key and * value classes as no conversion is done by the chaining code. - *
+ *
* Using the ChainMapper and the ChainReducer classes is possible to compose
* Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]
. And
* immediate benefit of this pattern is a dramatic reduction in disk IO.
- *
* IMPORTANT: There is no need to specify the output key/value classes for the * ChainMapper, this is done by the addMapper for the last mapper in the chain. - *
+ ** ChainMapper usage pattern: - *
+ **
* ... * conf.setJobName("chain"); * conf.setInputFormat(TextInputFormat.class); * conf.setOutputFormat(TextOutputFormat.class); - * + * * JobConf mapAConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, * Text.class, Text.class, true, mapAConf); - * + * * JobConf mapBConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, mapBConf); - * + * * JobConf reduceConf = new JobConf(false); * ... * ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, * Text.class, Text.class, true, reduceConf); - * + * * ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, null); - * + * * ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, * LongWritable.class, LongWritable.class, true, null); - * + * * FileInputFormat.setInputPaths(conf, inDir); * FileOutputFormat.setOutputPath(conf, outDir); * ... - * + * * JobClient jc = new JobClient(conf); * RunningJob job = jc.submitJob(conf); * ... @@ -95,21 +95,21 @@ public class ChainMapper implements Mapper { /** * Adds a Mapper class to the chain job's JobConf. - * + ** It has to be specified how key and values are passed from one element of * the chain to the next, by value or by reference. If a Mapper leverages the * assumed semantics that the key and values are not modified by the collector * 'by value' must be used. If the Mapper does not expect this semantics, as * an optimization to avoid serialization and deserialization 'by reference' * can be used. - *
+ ** For the added Mapper the configuration given for it, *
+ *mapperConf
, have precedence over the job's JobConf. This * precedence is in effect when the task is running. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainMapper, this is done by the addMapper for the last mapper in the chain - *
+ ** * @param job job's JobConf to add the Mapper class. * @param klass the Mapper class to add. @@ -148,7 +148,7 @@ public class ChainMapper implements Mapper { /** * Configures the ChainMapper and all the Mappers in the chain. - *
+ ** If this method is overriden
+ *super.configure(...)
should be * invoked at the beginning of the overwriter method. */ @@ -171,7 +171,7 @@ public class ChainMapper implements Mapper { /** * Closes the ChainMapper and all the Mappers in the chain. - ** If this method is overriden
+ *super.close()
should be * invoked at the end of the overwriter method. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java index 641d82c0839..6f5b7cde2e5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/ChainReducer.java @@ -27,63 +27,63 @@ import java.util.Iterator; /** * The ChainReducer class allows to chain multiple Mapper classes after a * Reducer within the Reducer task. - ** For each record output by the Reducer, the Mapper classes are invoked in a * chained (or piped) fashion, the output of the first becomes the input of the * second, and so on until the last Mapper, the output of the last Mapper will * be written to the task's output. - *
+ ** The key functionality of this feature is that the Mappers in the chain do not * need to be aware that they are executed after the Reducer or in a chain. * This enables having reusable specialized Mappers that can be combined to * perform composite operations within a single task. - *
+ ** Special care has to be taken when creating chains that the key/values output * by a Mapper are valid for the following Mapper in the chain. It is assumed * all Mappers and the Reduce in the chain use maching output and input key and * value classes as no conversion is done by the chaining code. - *
+ ** Using the ChainMapper and the ChainReducer classes is possible to compose * Map/Reduce jobs that look like
+ *[MAP+ / REDUCE MAP*]
. And * immediate benefit of this pattern is a dramatic reduction in disk IO. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainReducer, this is done by the setReducer or the addMapper for the last * element in the chain. - *
+ ** ChainReducer usage pattern: - *
+ **
* ... * conf.setJobName("chain"); * conf.setInputFormat(TextInputFormat.class); * conf.setOutputFormat(TextOutputFormat.class); - * + * * JobConf mapAConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, * Text.class, Text.class, true, mapAConf); - * + * * JobConf mapBConf = new JobConf(false); * ... * ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, mapBConf); - * + * * JobConf reduceConf = new JobConf(false); * ... * ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, * Text.class, Text.class, true, reduceConf); - * + * * ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, null); - * + * * ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, * LongWritable.class, LongWritable.class, true, null); - * + * * FileInputFormat.setInputPaths(conf, inDir); * FileOutputFormat.setOutputPath(conf, outDir); * ... - * + * * JobClient jc = new JobClient(conf); * RunningJob job = jc.submitJob(conf); * ... @@ -95,18 +95,18 @@ public class ChainReducer implements Reducer { /** * Sets the Reducer class to the chain job's JobConf. - * + ** It has to be specified how key and values are passed from one element of * the chain to the next, by value or by reference. If a Reducer leverages the * assumed semantics that the key and values are not modified by the collector * 'by value' must be used. If the Reducer does not expect this semantics, as * an optimization to avoid serialization and deserialization 'by reference' * can be used. - *
+ ** For the added Reducer the configuration given for it, *
+ *reducerConf
, have precedence over the job's JobConf. This * precedence is in effect when the task is running. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainReducer, this is done by the setReducer or the addMapper for the last * element in the chain. @@ -139,18 +139,18 @@ public class ChainReducer implements Reducer { /** * Adds a Mapper class to the chain job's JobConf. - *
+ ** It has to be specified how key and values are passed from one element of * the chain to the next, by value or by reference. If a Mapper leverages the * assumed semantics that the key and values are not modified by the collector * 'by value' must be used. If the Mapper does not expect this semantics, as * an optimization to avoid serialization and deserialization 'by reference' * can be used. - *
+ ** For the added Mapper the configuration given for it, *
+ *mapperConf
, have precedence over the job's JobConf. This * precedence is in effect when the task is running. - ** IMPORTANT: There is no need to specify the output key/value classes for the * ChainMapper, this is done by the addMapper for the last mapper in the chain * . @@ -191,7 +191,7 @@ public class ChainReducer implements Reducer { /** * Configures the ChainReducer, the Reducer and all the Mappers in the chain. - *
+ ** If this method is overriden
+ *super.configure(...)
should be * invoked at the beginning of the overwriter method. */ @@ -215,7 +215,7 @@ public class ChainReducer implements Reducer { /** * Closes the ChainReducer, the Reducer and all the Mappers in the chain. - ** If this method is overriden
+ *super.close()
should be * invoked at the end of the overwriter method. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java index 39e80f9a16b..f0f3652beb1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/MultipleOutputs.java @@ -31,29 +31,29 @@ import java.util.*; * than the job default output via theOutputCollector
passed to * themap()
andreduce()
methods of the *Mapper
andReducer
implementations. - ** Each additional output, or named output, may be configured with its own *
+ *OutputFormat
, with its own key class and with its own value * class. - ** A named output can be a single file or a multi file. The later is refered as * a multi named output. - *
+ ** A multi named output is an unbound set of files all sharing the same *
+ *OutputFormat
, key class and value class configuration. - ** When named outputs are used within a
+ *Mapper
implementation, * key/values written to a name output are not part of the reduce phase, only * key/values written to the jobOutputCollector
are part of the * reduce phase. - ** MultipleOutputs supports counters, by default the are disabled. The counters * group is the {@link MultipleOutputs} class name. *
* The names of the counters are the same as the named outputs. For multi * named outputs the name of the counter is the concatenation of the named * output, and underscore '_' and the multiname. - * + ** Job configuration usage pattern is: *
* @@ -82,7 +82,7 @@ import java.util.*; * * ... *- * + ** Job configuration usage pattern is: *
* @@ -271,7 +271,6 @@ public class MultipleOutputs { /** * Adds a named output for the job. - * * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters @@ -291,7 +290,6 @@ public class MultipleOutputs { /** * Adds a multi named output for the job. - * * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters @@ -311,7 +309,6 @@ public class MultipleOutputs { /** * Adds a named output for the job. - * * * @param conf job conf to add the named output * @param namedOutput named output name, it has to be a word, letters @@ -339,9 +336,9 @@ public class MultipleOutputs { /** * Enables or disables counters for the named outputs. - * + ** By default these counters are disabled. - *
+ ** MultipleOutputs supports counters, by default the are disabled. * The counters group is the {@link MultipleOutputs} class name. *
@@ -358,9 +355,9 @@ public class MultipleOutputs { /** * Returns if the counters for the named outputs are enabled or not. - * + ** By default these counters are disabled. - *
+ ** MultipleOutputs supports counters, by default the are disabled. * The counters group is the {@link MultipleOutputs} class name. *
@@ -465,7 +462,6 @@ public class MultipleOutputs { /** * Gets the output collector for a named output. - * * * @param namedOutput the named output name * @param reporter the reporter @@ -480,7 +476,6 @@ public class MultipleOutputs { /** * Gets the output collector for a multi named output. - * * * @param namedOutput the named output name * @param multiName the multi name part @@ -525,7 +520,7 @@ public class MultipleOutputs { /** * Closes all the opened named outputs. - * + ** If overriden subclasses must invoke
super.close()
at the * end of theirclose()
* diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java index 8e884cee8bb..75179e110f9 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TokenCountMapper.java @@ -32,7 +32,7 @@ import org.apache.hadoop.mapred.Reporter; /** - * A {@link Mapper} that maps text values intopairs. Uses + * A {@link Mapper} that maps text values into <token,freq> pairs. Uses * {@link StringTokenizer} to break text into tokens. */ @InterfaceAudience.Public diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java index 8c20723b711..6251925ea2e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorJob.java @@ -60,7 +60,7 @@ import org.apache.hadoop.util.GenericOptionsParser; * The developer using Aggregate will need only to provide a plugin class * conforming to the following interface: * - * public interface ValueAggregatorDescriptor { public ArrayList + * public interface ValueAggregatorDescriptor { public ArrayList<Entry> * generateKeyValPairs(Object key, Object value); public void * configure(JobConfjob); } * diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java index a6b357324da..2738968d6b1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/aggregate/ValueAggregatorReducer.java @@ -45,7 +45,8 @@ public class ValueAggregatorReducer values, OutputCollector output, Reporter reporter) throws IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java index 2715705407e..159919f5a04 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/db/DBInputFormat.java @@ -195,8 +195,8 @@ public class DBInputFormat * @param inputClass the class object implementing DBWritable, which is the * Java object holding tuple fields. * @param tableName The table to read data from - * @param conditions The condition which to select data with, eg. '(updated > - * 20070101 AND length > 0)' + * @param conditions The condition which to select data with, eg. '(updated > + * 20070101 AND length > 0)' * @param orderBy the fieldNames in the orderBy clause. * @param fieldNames The field names in the table * @see #setInput(JobConf, Class, String, String) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java index 60ff715cb83..34353acb878 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Cluster.java @@ -134,6 +134,7 @@ public class Cluster { /** * Close the Cluster
. + * @throws IOException */ public synchronized void close() throws IOException { clientProtocolProvider.close(client); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java index c4c2778dd0b..b5e54b5eaf2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/ClusterMetrics.java @@ -40,15 +40,15 @@ import org.apache.hadoop.io.Writable; * Slot capacity of the cluster. *
Clients can query for the latest ClusterMetrics
, via
* {@link Cluster#getClusterStatus()}.
The framework first calls
* {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
- * {@link #map(Object, Object, Context)}
+ * {@link #map(Object, Object, org.apache.hadoop.mapreduce.Mapper.Context)}
* for each key/value pair in the InputSplit
. Finally
- * {@link #cleanup(Context)} is called.
All intermediate values associated with a given output key are * subsequently grouped by the framework, and passed to a {@link Reducer} to @@ -84,9 +84,10 @@ import org.apache.hadoop.mapreduce.task.MapContextImpl; * } * } * } - *
+ * * - *Applications may override the {@link #run(Context)} method to exert + *
Applications may override the
+ * {@link #run(org.apache.hadoop.mapreduce.Mapper.Context)} method to exert
* greater control on map processing e.g. multi-threaded Mapper
s
* etc.
The Reducer
copies the sorted output from each
* {@link Mapper} using HTTP across the network.
The framework merge sorts Reducer
inputs by
* key
s
@@ -55,7 +55,7 @@ import java.util.Iterator;
*
The shuffle and sort phases occur simultaneously i.e. while outputs are * being fetched they are merged.
* - *To achieve a secondary sort on the values returned by the value * iterator, the application should extend the key with the secondary @@ -83,10 +83,10 @@ import java.util.Iterator; *
In this phase the
- * {@link #reduce(Object, Iterable, Context)}
+ * {@link #reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)}
* method is called for each <key, (collection of values)>
in
* the sorted inputs.
The output of the reduce task is typically written to a @@ -113,7 +113,7 @@ import java.util.Iterator; * context.write(key, result); * } * } - *
+ * * * @see Mapper * @see Partitioner diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java index 06737c99396..51fe69a7212 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/DistributedCache.java @@ -115,7 +115,7 @@ import java.net.URI; * } * } * - * + * * * It is also very common to use the DistributedCache by using * {@link org.apache.hadoop.util.GenericOptionsParser}. @@ -235,7 +235,6 @@ public class DistributedCache { * DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps * @return a long array of timestamps - * @throws IOException * @deprecated Use {@link JobContext#getArchiveTimestamps()} instead */ @Deprecated @@ -250,7 +249,6 @@ public class DistributedCache { * DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps * @return a long array of timestamps - * @throws IOException * @deprecated Use {@link JobContext#getFileTimestamps()} instead */ @Deprecated @@ -434,7 +432,6 @@ public class DistributedCache { * internal DistributedCache and MapReduce code. * @param conf The configuration which stored the timestamps * @return a string array of booleans - * @throws IOException */ public static boolean[] getFileVisibilities(Configuration conf) { return parseBooleans(conf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES)); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java index d8833da5255..de25f649062 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueAggregatorJob.java @@ -60,7 +60,7 @@ import org.apache.hadoop.util.GenericOptionsParser; * The developer using Aggregate will need only to provide a plugin class * conforming to the following interface: * - * public interface ValueAggregatorDescriptor { public ArrayList* The configuration properties of the chain job have precedence over the * configuration properties of the Mapper. * @@ -738,7 +738,7 @@ public class Chain { /** * Sets the Reducer class to the chain job. * - *
+ ** The configuration properties of the chain job have precedence over the * configuration properties of the Reducer. * diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java index c042ff0769c..c3bf0120153 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainMapper.java @@ -57,24 +57,24 @@ import org.apache.hadoop.mapreduce.lib.chain.Chain.ChainBlockingQueue; * ChainMapper, this is done by the addMapper for the last mapper in the chain. *
* ChainMapper usage pattern: - * + ** *
* ... * Job = new Job(conf); - * + * * Configuration mapAConf = new Configuration(false); * ... * ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class, * Text.class, Text.class, true, mapAConf); - * + * * Configuration mapBConf = new Configuration(false); * ... * ChainMapper.addMapper(job, BMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, mapBConf); - * + * * ... - * + * * job.waitForComplettion(true); * ... *diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java index dc03d5d8cf6..1c375879052 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/chain/ChainReducer.java @@ -50,7 +50,7 @@ import java.io.IOException; * all Mappers and the Reduce in the chain use matching output and input key and * value classes as no conversion is done by the chaining code. * - * Using the ChainMapper and the ChainReducer classes is possible to + *
Using the ChainMapper and the ChainReducer classes is possible to
* compose Map/Reduce jobs that look like [MAP+ / REDUCE MAP*]
. And
* immediate benefit of this pattern is a dramatic reduction in disk IO.
@@ -59,26 +59,26 @@ import java.io.IOException; * element in the chain. *
* ChainReducer usage pattern: - * + ** *
* ... * Job = new Job(conf); * .... - * + * * Configuration reduceConf = new Configuration(false); * ... * ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class, * Text.class, Text.class, true, reduceConf); - * + * * ChainReducer.addMapper(job, CMap.class, Text.class, Text.class, * LongWritable.class, Text.class, false, null); - * + * * ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class, * LongWritable.class, LongWritable.class, true, null); - * + * * ... - * + * * job.waitForCompletion(true); * ... *diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java index a6953b7598a..78c3a0fd8e8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/db/DBInputFormat.java @@ -319,7 +319,7 @@ public class DBInputFormat
* It can be used instead of the default implementation, - * @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU + * {@link org.apache.hadoop.mapred.MapRunner}, when the Map operation is not CPU * bound in order to improve throughput. *
* Mapper implementations using this MapRunnable must be thread-safe. *
* The Map-Reduce job has to be configured with the mapper to use via - * {@link #setMapperClass(Configuration, Class)} and + * {@link #setMapperClass(Job, Class)} and * the number of thread the thread-pool can use with the - * {@link #getNumberOfThreads(Configuration) method. The default + * {@link #getNumberOfThreads(JobContext)} method. The default * value is 10 threads. *
*/ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java index fa3708e7948..2c6954275f2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java @@ -181,7 +181,7 @@ public static final String OUTDIR = "mapreduce.output.fileoutputformat.outputdir * Get the {@link Path} to the task's temporary output directory * for the map-reduce job * - *
Some applications need to create/write-to side-files, which differ from * the actual job-outputs. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java index 24baa596b31..c31cab7ddd8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/MultipleOutputs.java @@ -81,7 +81,7 @@ import java.util.*; *
* Usage in Reducer: *
- ** Equivalently, {@link RandomWriter} also supports all the above options * and ones supported by {@link GenericOptionsParser} via the command-line. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/MultiFileWordCount.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/MultiFileWordCount.java index d3df4b303d0..b51946eb105 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/MultiFileWordCount.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/MultiFileWordCount.java @@ -199,7 +199,7 @@ public class MultiFileWordCount extends Configured implements Tool { } /** - * This Mapper is similar to the one in {@link WordCount.MapClass}. + * This Mapper is similar to the one in {@link WordCount.TokenizerMapper}. */ public static class MapClass extends MapperString generateFileName(K k, V v) { + * <K, V> String generateFileName(K k, V v) { * return k.toString() + "_" + v.toString(); * } * @@ -124,16 +124,16 @@ import java.util.*; * * * - * private MultipleOutputs* * Equivalently, {@link RandomTextWriter} also supports all the above options * and ones supported by {@link Tool} via the command-line. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java index a326c8ca2be..67c9ca8f0f3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomWriter.java @@ -47,7 +47,7 @@ import org.apache.hadoop.util.ToolRunner; * random binary sequence file of BytesWritable. * In order for this program to generate data for terasort with 10-byte keys * and 90-byte values, have the following config: - *out; + * private MultipleOutputs<Text, Text> out; * * public void setup(Context context) { - * out = new MultipleOutputs (context); + * out = new MultipleOutputs<Text, Text>(context); * ... * } * - * public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { + * public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { * for (Text t : values) { - * out.write(key, t, generateFileName(<parameter list...>)); + * out.write(key, t, generateFileName(<parameter list...>)); * } * } * @@ -294,7 +294,6 @@ public class MultipleOutputs { /** * Adds a named output for the job. - * * * @param job job to add the named output * @param namedOutput named output name, it has to be a word, letters diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/BinaryPartitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/BinaryPartitioner.java index 4a408409244..2a89908119f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/BinaryPartitioner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/BinaryPartitioner.java @@ -64,7 +64,7 @@ import org.apache.hadoop.mapreduce.Partitioner; * - {@link #setOffsets}
*- {@link #setLeftOffset}
*- {@link #setRightOffset}
- * + * */ @InterfaceAudience.Public @InterfaceStability.Evolving diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java index 247c2f2029b..b9014ef7861 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/JobContextImpl.java @@ -374,7 +374,6 @@ public class JobContextImpl implements JobContext { * Get the timestamps of the archives. Used by internal * DistributedCache and MapReduce code. * @return a string array of timestamps - * @throws IOException */ public String[] getArchiveTimestamps() { return toTimestampStrs(DistributedCache.getArchiveTimestamps(conf)); @@ -384,7 +383,6 @@ public class JobContextImpl implements JobContext { * Get the timestamps of the files. Used by internal * DistributedCache and MapReduce code. * @return a string array of timestamps - * @throws IOException */ public String[] getFileTimestamps() { return toTimestampStrs(DistributedCache.getFileTimestamps(conf)); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java index 40e101ab8fd..6cb3211e1fa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java @@ -42,7 +42,7 @@ import org.apache.hadoop.util.ToolRunner; * random sequence of words. * In order for this program to generate data for terasort with a 5-10 words * per key and 20-100 words per value, have the following config: - *+ * + * }{@code * * *@@ -66,7 +66,7 @@ import org.apache.hadoop.util.ToolRunner; * mapreduce.randomtextwriter.totalbytes *1099511627776 * - *+ * - * + * }{@code * * *@@ -71,8 +71,7 @@ import org.apache.hadoop.util.ToolRunner; * mapreduce.randomwriter.totalbytes *1099511627776 * - *
{@code * * *@@ -66,7 +66,7 @@ import org.apache.hadoop.util.ToolRunner; * mapreduce.randomtextwriter.totalbytes *1099511627776 * - *
{@code * * *@@ -71,8 +71,7 @@ import org.apache.hadoop.util.ToolRunner; * mapreduce.randomwriter.totalbytes *1099511627776 * - *
*/ diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java index 593c1a462bf..7a80e8df6c8 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/emulators/resourceusage/ResourceUsageEmulatorPlugin.java @@ -35,7 +35,7 @@ import org.apache.hadoop.conf.Configuration; * {@link ResourceUsageEmulatorPlugin} is also configured with a feedback module * i.e a {@link ResourceCalculatorPlugin}, to monitor the current resource * usage. {@link ResourceUsageMetrics} decides the final resource usage value to - * emulate. {@link Progressive} keeps track of the task's progress.
+ * emulate. {@link Progressive} keeps track of the task's progress. * ** The swift rest client takes a Properties instance containing * the string values it uses to bind to a swift endpoint. - *
+ ** This class extracts the values for a specific filesystem endpoint * and then builds an appropriate Properties file. */ @@ -188,7 +188,7 @@ public final class RestClientBindings { /** * Copy a (trimmed) property from the configuration file to the properties file. - *
+ ** If marked as required and not found in the configuration, an * exception is raised. * If not required -and missing- then the property will not be set. diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftRestClient.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftRestClient.java index 28f8b47f4ac..55dad111242 100644 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftRestClient.java +++ b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/http/SwiftRestClient.java @@ -1061,10 +1061,9 @@ public final class SwiftRestClient { * Authenticate to Openstack Keystone * As well as returning the access token, the member fields {@link #token}, * {@link #endpointURI} and {@link #objectLocationURI} are set up for re-use. - *
+ ** This method is re-entrant -if more than one thread attempts to authenticate * neither will block -but the field values with have those of the last caller. - *
* * @return authenticated access token */ @@ -1575,6 +1574,7 @@ public final class SwiftRestClient { * @param path path to object * @param endpointURI damain url e.g. http://domain.com * @return valid URI for object + * @throws SwiftException */ public static URI pathToURI(SwiftObjectPath path, URI endpointURI) throws SwiftException { @@ -1820,7 +1820,7 @@ public final class SwiftRestClient { /** * Get the blocksize of this filesystem - * @return a blocksize >0 + * @return a blocksize > 0 */ public long getBlocksizeKB() { return blocksizeKB; diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystem.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystem.java index b70f7efef58..27a572fd842 100644 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystem.java +++ b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystem.java @@ -225,10 +225,10 @@ public class SwiftNativeFileSystem extends FileSystem { * Return an array containing hostnames, offset and size of * portions of the given file. For a nonexistent * file or regions, null will be returned. - * + ** This call is most helpful with DFS, where it returns * hostnames of machines that contain the given file. - *
+ ** The FileSystem will simply return an elt containing 'localhost'. */ @Override @@ -645,7 +645,7 @@ public class SwiftNativeFileSystem extends FileSystem { /** * Low level method to do a deep listing of all entries, not stopping * at the next directory entry. This is to let tests be confident that - * recursive deletes &c really are working. + * recursive deletes really are working. * @param path path to recurse down * @param newest ask for the newest data, potentially slower than not. * @return a potentially empty array of file status diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystemStore.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystemStore.java index 0138eae412d..6d812a0e6ea 100644 --- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystemStore.java +++ b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/snative/SwiftNativeFileSystemStore.java @@ -518,7 +518,7 @@ public class SwiftNativeFileSystemStore { * Rename through copy-and-delete. this is a consequence of the * Swift filesystem using the path as the hash * into the Distributed Hash Table, "the ring" of filenames. - *
+ ** Because of the nature of the operation, it is not atomic. * * @param src source file/dir @@ -847,7 +847,7 @@ public class SwiftNativeFileSystemStore { } /** - * Insert a throttled wait if the throttle delay >0 + * Insert a throttled wait if the throttle delay > 0 * @throws InterruptedIOException if interrupted during sleep */ public void throttle() throws InterruptedIOException { @@ -878,7 +878,7 @@ public class SwiftNativeFileSystemStore { * raised. This lets the caller distinguish a file not found with * other reasons for failure, so handles race conditions in recursive * directory deletes better. - *
+ *
* The problem being addressed is: caller A requests a recursive directory
* of directory /dir ; caller B requests a delete of a file /dir/file,
* between caller A enumerating the files contents, and requesting a delete
diff --git a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftTestUtils.java b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftTestUtils.java
index c9e26acf3d4..01ec739eda8 100644
--- a/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftTestUtils.java
+++ b/hadoop-tools/hadoop-openstack/src/main/java/org/apache/hadoop/fs/swift/util/SwiftTestUtils.java
@@ -236,7 +236,7 @@ public class SwiftTestUtils extends org.junit.Assert {
/**
* Convert a byte to a character for printing. If the
- * byte value is < 32 -and hence unprintable- the byte is
+ * byte value is < 32 -and hence unprintable- the byte is
* returned as a two digit hex value
* @param b byte
* @return the printable character string
diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/InputDemuxer.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/InputDemuxer.java
index cd99e1ce989..0927a771d3b 100644
--- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/InputDemuxer.java
+++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/InputDemuxer.java
@@ -45,12 +45,12 @@ public interface InputDemuxer extends Closeable {
public void bindTo(Path path, Configuration conf) throws IOException;
/**
- * Get the next