<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> <!-- Generated by the JDiff Javadoc doclet --> <!-- (http://www.jdiff.org) --> <!-- on Sun Dec 04 01:04:32 UTC 2011 --> <api xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:noNamespaceSchemaLocation='api.xsd' name="hadoop-mapred 0.22.0" jdversion="1.0.9"> <!-- Command line arguments = -doclet org.apache.hadoop.classification.tools.ExcludePrivateAnnotationsJDiffDoclet -docletpath /x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/build/ivy/lib/Hadoop/common/hadoop-common-0.22.0-SNAPSHOT.jar:/x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/build/ivy/lib/Hadoop/jdiff/jdiff-1.0.9.jar:/x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/build/ivy/lib/Hadoop/jdiff/xerces-1.4.4.jar -classpath /x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/build/classes:/x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/conf:/home/jenkins/.ivy2/cache/org.apache.hadoop/hadoop-common/jars/hadoop-common-0.22.0-SNAPSHOT.jar:/home/jenkins/.ivy2/cache/commons-cli/commons-cli/jars/commons-cli-1.2.jar:/home/jenkins/.ivy2/cache/xmlenc/xmlenc/jars/xmlenc-0.52.jar:/home/jenkins/.ivy2/cache/commons-codec/commons-codec/jars/commons-codec-1.4.jar:/home/jenkins/.ivy2/cache/commons-logging/commons-logging/jars/commons-logging-1.1.1.jar:/home/jenkins/.ivy2/cache/org.slf4j/slf4j-api/jars/slf4j-api-1.6.1.jar:/home/jenkins/.ivy2/cache/org.slf4j/slf4j-log4j12/jars/slf4j-log4j12-1.6.1.jar:/home/jenkins/.ivy2/cache/log4j/log4j/bundles/log4j-1.2.16.jar:/home/jenkins/.ivy2/cache/org.mortbay.jetty/jetty/jars/jetty-6.1.26.jar:/home/jenkins/.ivy2/cache/org.mortbay.jetty/jetty-util/jars/jetty-util-6.1.26.jar:/home/jenkins/.ivy2/cache/org.mortbay.jetty/servlet-api/jars/servlet-api-2.5-20081211.jar:/home/jenkins/.ivy2/cache/tomcat/jasper-runtime/jars/jasper-runtime-5.5.12.jar:/home/jenkins/.ivy2/cache/tomcat/jasper-compiler/jars/jasper-compiler-5.5.12.jar:/home/jenkins/.ivy2/cache/org.mortbay.jetty/jsp-2.1-jetty/jars/jsp-2.1-jetty-6.1.26.jar:/home/jenkins/.ivy2/cache/org.eclipse.jdt/core/jars/core-3.1.1.jar:/home/jenkins/.ivy2/cache/org.mortbay.jetty/jsp-api-2.1-glassfish/jars/jsp-api-2.1-glassfish-2.1.v20091210.jar:/home/jenkins/.ivy2/cache/org.mortbay.jetty/jsp-2.1-glassfish/jars/jsp-2.1-glassfish-2.1.v20091210.jar:/home/jenkins/.ivy2/cache/org.eclipse.jdt.core.compiler/ecj/jars/ecj-3.5.1.jar:/home/jenkins/.ivy2/cache/commons-el/commons-el/jars/commons-el-1.0.jar:/home/jenkins/.ivy2/cache/net.java.dev.jets3t/jets3t/jars/jets3t-0.7.1.jar:/home/jenkins/.ivy2/cache/commons-httpclient/commons-httpclient/jars/commons-httpclient-3.1.jar:/home/jenkins/.ivy2/cache/commons-net/commons-net/jars/commons-net-1.4.1.jar:/home/jenkins/.ivy2/cache/oro/oro/jars/oro-2.0.8.jar:/home/jenkins/.ivy2/cache/net.sf.kosmosfs/kfs/jars/kfs-0.3.jar:/home/jenkins/.ivy2/cache/junit/junit/jars/junit-4.8.1.jar:/home/jenkins/.ivy2/cache/hsqldb/hsqldb/jars/hsqldb-1.8.0.10.jar:/home/jenkins/.ivy2/cache/org.apache.avro/avro/jars/avro-1.5.3.jar:/home/jenkins/.ivy2/cache/org.codehaus.jackson/jackson-mapper-asl/jars/jackson-mapper-asl-1.7.3.jar:/home/jenkins/.ivy2/cache/org.codehaus.jackson/jackson-core-asl/jars/jackson-core-asl-1.7.3.jar:/home/jenkins/.ivy2/cache/com.thoughtworks.paranamer/paranamer/jars/paranamer-2.3.jar:/home/jenkins/.ivy2/cache/org.xerial.snappy/snappy-java/bundles/snappy-java-1.0.3.2.jar:/home/jenkins/.ivy2/cache/org.apache.avro/avro-ipc/jars/avro-ipc-1.5.3.jar:/home/jenkins/.ivy2/cache/org.apache.hadoop/hadoop-common-test/jars/hadoop-common-test-0.22.0-SNAPSHOT.jar:/home/jenkins/.ivy2/cache/org.apache.hadoop/hadoop-hdfs/jars/hadoop-hdfs-0.22.0-SNAPSHOT.jar:/home/jenkins/.ivy2/cache/com.google.guava/guava/jars/guava-r09.jar:/home/jenkins/.ivy2/cache/org.apache.avro/avro-compiler/jars/avro-compiler-1.5.3.jar:/home/jenkins/.ivy2/cache/commons-lang/commons-lang/jars/commons-lang-2.5.jar:/home/jenkins/.ivy2/cache/org.apache.velocity/velocity/jars/velocity-1.6.4.jar:/home/jenkins/.ivy2/cache/commons-collections/commons-collections/jars/commons-collections-3.2.1.jar:/home/jenkins/.ivy2/cache/org.aspectj/aspectjrt/jars/aspectjrt-1.6.5.jar:/home/jenkins/.ivy2/cache/org.aspectj/aspectjtools/jars/aspectjtools-1.6.5.jar:/home/jenkins/.ivy2/cache/jdiff/jdiff/jars/jdiff-1.0.9.jar:/home/jenkins/.ivy2/cache/xerces/xerces/jars/xerces-1.4.4.jar:/home/jenkins/tools/ant/latest/lib/ant-launcher.jar:/usr/share/java/xmlParserAPIs.jar:/usr/share/java/xercesImpl.jar:/home/jenkins/tools/ant/latest/lib/ant-apache-resolver.jar:/home/jenkins/tools/ant/latest/lib/ant-apache-bcel.jar:/home/jenkins/tools/ant/latest/lib/ant-jsch.jar:/home/jenkins/tools/ant/latest/lib/ant-jmf.jar:/home/jenkins/tools/ant/latest/lib/ant-apache-oro.jar:/home/jenkins/tools/ant/latest/lib/ant-netrexx.jar:/home/jenkins/tools/ant/latest/lib/ant-testutil.jar:/home/jenkins/tools/ant/latest/lib/ant-apache-xalan2.jar:/home/jenkins/tools/ant/latest/lib/ant-javamail.jar:/home/jenkins/tools/ant/latest/lib/ant.jar:/home/jenkins/tools/ant/latest/lib/ant-junit.jar:/home/jenkins/tools/ant/latest/lib/ant-swing.jar:/home/jenkins/tools/ant/latest/lib/ant-commons-net.jar:/home/jenkins/tools/ant/latest/lib/ant-jdepend.jar:/home/jenkins/tools/ant/latest/lib/ant-junit4.jar:/home/jenkins/tools/ant/latest/lib/ant-commons-logging.jar:/home/jenkins/tools/ant/latest/lib/ant-apache-bsf.jar:/home/jenkins/tools/ant/latest/lib/ant-apache-log4j.jar:/home/jenkins/tools/ant/latest/lib/ant-jai.jar:/home/jenkins/tools/ant/latest/lib/ant-apache-regexp.jar:/home/jenkins/tools/ant/latest/lib/ant-antlr.jar:/tmp/jdk1.6.0_29/lib/tools.jar -sourcepath /x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/src/java:/x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/src/tools -apidir /x1/jenkins/jenkins-slave/workspace/Hadoop-22-Build/common/mapreduce/lib/jdiff -apiname hadoop-mapred 0.22.0 --> <package name="org.apache.hadoop.filecache"> <!-- start class org.apache.hadoop.filecache.DistributedCache --> <class name="DistributedCache" extends="org.apache.hadoop.mapreduce.filecache.DistributedCache" abstract="false" static="false" final="false" visibility="public" deprecated="Use methods on {@link Job}."> <constructor name="DistributedCache" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[Distribute application-specific large, read-only files efficiently. <p><code>DistributedCache</code> is a facility provided by the Map-Reduce framework to cache files (text, archives, jars etc.) needed by applications. </p> <p>Applications specify the files, via urls (hdfs:// or http://) to be cached via the {@link org.apache.hadoop.mapred.JobConf}. The <code>DistributedCache</code> assumes that the files specified via urls are already present on the {@link FileSystem} at the path specified by the url and are accessible by every machine in the cluster.</p> <p>The framework will copy the necessary files on to the slave node before any tasks for the job are executed on that node. Its efficiency stems from the fact that the files are only copied once per job and the ability to cache archives which are un-archived on the slaves.</p> <p><code>DistributedCache</code> can be used to distribute simple, read-only data/text files and/or more complex types such as archives, jars etc. Archives (zip, tar and tgz/tar.gz files) are un-archived at the slave nodes. Jars may be optionally added to the classpath of the tasks, a rudimentary software distribution mechanism. Files have execution permissions. Optionally users can also direct it to symlink the distributed cache file(s) into the working directory of the task.</p> <p><code>DistributedCache</code> tracks modification timestamps of the cache files. Clearly the cache files should not be modified by the application or externally while the job is executing.</p> <p>Here is an illustrative example on how to use the <code>DistributedCache</code>:</p> <p><blockquote><pre> // Setting up the cache for the application 1. Copy the requisite files to the <code>FileSystem</code>: $ bin/hadoop fs -copyFromLocal lookup.dat /myapp/lookup.dat $ bin/hadoop fs -copyFromLocal map.zip /myapp/map.zip $ bin/hadoop fs -copyFromLocal mylib.jar /myapp/mylib.jar $ bin/hadoop fs -copyFromLocal mytar.tar /myapp/mytar.tar $ bin/hadoop fs -copyFromLocal mytgz.tgz /myapp/mytgz.tgz $ bin/hadoop fs -copyFromLocal mytargz.tar.gz /myapp/mytargz.tar.gz 2. Setup the application's <code>JobConf</code>: JobConf job = new JobConf(); DistributedCache.addCacheFile(new URI("/myapp/lookup.dat#lookup.dat"), job); DistributedCache.addCacheArchive(new URI("/myapp/map.zip", job); DistributedCache.addFileToClassPath(new Path("/myapp/mylib.jar"), job); DistributedCache.addCacheArchive(new URI("/myapp/mytar.tar", job); DistributedCache.addCacheArchive(new URI("/myapp/mytgz.tgz", job); DistributedCache.addCacheArchive(new URI("/myapp/mytargz.tar.gz", job); 3. Use the cached files in the {@link org.apache.hadoop.mapred.Mapper} or {@link org.apache.hadoop.mapred.Reducer}: public static class MapClass extends MapReduceBase implements Mapper<K, V, K, V> { private Path[] localArchives; private Path[] localFiles; public void configure(JobConf job) { // Get the cached archives/files localArchives = DistributedCache.getLocalCacheArchives(job); localFiles = DistributedCache.getLocalCacheFiles(job); } public void map(K key, V value, OutputCollector<K, V> output, Reporter reporter) throws IOException { // Use data from the cached archives/files here // ... // ... output.collect(k, v); } } </pre></blockquote></p> It is also very common to use the DistributedCache by using {@link org.apache.hadoop.util.GenericOptionsParser}. This class includes methods that should be used by users (specifically those mentioned in the example above, as well as {@link DistributedCache#addArchiveToClassPath(Path, Configuration)}), as well as methods intended for use by the MapReduce framework (e.g., {@link org.apache.hadoop.mapred.JobClient}). For implementation details, see {@link TrackerDistributedCacheManager} and {@link TaskDistributedCacheManager}. @see org.apache.hadoop.mapred.JobConf @see org.apache.hadoop.mapred.JobClient @see org.apache.hadoop.mapreduce.Job @deprecated Use methods on {@link Job}.]]> </doc> </class> <!-- end class org.apache.hadoop.filecache.DistributedCache --> </package> <package name="org.apache.hadoop.mapred"> <!-- start class org.apache.hadoop.mapred.ClusterStatus --> <class name="ClusterStatus" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link ClusterMetrics} or {@link TaskTrackerInfo} instead"> <implements name="org.apache.hadoop.io.Writable"/> <method name="getTaskTrackers" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of task trackers in the cluster. @return the number of task trackers in the cluster.]]> </doc> </method> <method name="getActiveTrackerNames" return="java.util.Collection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the names of task trackers in the cluster. @return the active task trackers in the cluster.]]> </doc> </method> <method name="getBlacklistedTrackerNames" return="java.util.Collection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the names of task trackers in the cluster. @return the blacklisted task trackers in the cluster.]]> </doc> </method> <method name="getBlacklistedTrackers" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of blacklisted task trackers in the cluster. @return the number of blacklisted task trackers in the cluster.]]> </doc> </method> <method name="getNumExcludedNodes" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of excluded hosts in the cluster. @return the number of excluded hosts in the cluster.]]> </doc> </method> <method name="getTTExpiryInterval" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the tasktracker expiry interval for the cluster @return the expiry interval in msec]]> </doc> </method> <method name="getMapTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of currently running map tasks in the cluster. @return the number of currently running map tasks in the cluster.]]> </doc> </method> <method name="getReduceTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of currently running reduce tasks in the cluster. @return the number of currently running reduce tasks in the cluster.]]> </doc> </method> <method name="getMaxMapTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the maximum capacity for running map tasks in the cluster. @return the maximum capacity for running map tasks in the cluster.]]> </doc> </method> <method name="getMaxReduceTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the maximum capacity for running reduce tasks in the cluster. @return the maximum capacity for running reduce tasks in the cluster.]]> </doc> </method> <method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Use {@link #getJobTrackerStatus()} instead."> <doc> <![CDATA[Get the current state of the <code>JobTracker</code>, as {@link JobTracker.State} @return the current state of the <code>JobTracker</code>. @deprecated Use {@link #getJobTrackerStatus()} instead.]]> </doc> </method> <method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the JobTracker's status. @return {@link JobTrackerStatus} of the JobTracker]]> </doc> </method> <method name="getBlackListedTrackersInfo" return="java.util.Collection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets the list of blacklisted trackers along with reasons for blacklisting. @return the collection of {@link BlackListInfo} objects.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Status information on the current state of the Map-Reduce cluster. <p><code>ClusterStatus</code> provides clients with information such as: <ol> <li> Size of the cluster. </li> <li> Name of the trackers. </li> <li> Task capacity of the cluster. </li> <li> The number of currently running map & reduce tasks. </li> <li> State of the <code>JobTracker</code>. </li> <li> Details regarding black listed trackers. </li> </ol></p> <p>Clients can query for the latest <code>ClusterStatus</code>, via {@link JobClient#getClusterStatus()}.</p> @see JobClient @deprecated Use {@link ClusterMetrics} or {@link TaskTrackerInfo} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.ClusterStatus --> <!-- start class org.apache.hadoop.mapred.ClusterStatus.BlackListInfo --> <class name="ClusterStatus.BlackListInfo" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <method name="getTrackerName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets the blacklisted tasktracker's name. @return tracker's name.]]> </doc> </method> <method name="getReasonForBlackListing" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets the reason for which the tasktracker was blacklisted. @return reason which tracker was blacklisted]]> </doc> </method> <method name="getBlackListReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets a descriptive report about why the tasktracker was blacklisted. @return report describing why the tasktracker was blacklisted.]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Class which encapsulates information about a blacklisted tasktracker. The information includes the tasktracker's name and reasons for getting blacklisted. The toString method of the class will print the information in a whitespace separated fashion to enable parsing.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.ClusterStatus.BlackListInfo --> <!-- start class org.apache.hadoop.mapred.Counters --> <class name="Counters" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Counters} instead."> <implements name="org.apache.hadoop.io.Writable"/> <implements name="java.lang.Iterable"/> <constructor name="Counters" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getGroupNames" return="java.util.Collection" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the names of all counter classes. @return Set of counter names.]]> </doc> </method> <method name="iterator" return="java.util.Iterator" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="groupName" type="java.lang.String"/> <doc> <![CDATA[Returns the named counter group, or an empty group if there is none with the specified name.]]> </doc> </method> <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Enum"/> <doc> <![CDATA[Find the counter for the given enum. The same enum will always return the same counter. @param key the counter key @return the matching counter object]]> </doc> </method> <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="group" type="java.lang.String"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Find a counter given the group and the name. @param group the name of the group @param name the internal name of the counter @return the counter for that name]]> </doc> </method> <method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="deprecated, no comment"> <param name="group" type="java.lang.String"/> <param name="id" type="int"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Find a counter by using strings @param group the name of the group @param id the id of the counter within the group (0 to N-1) @param name the internal name of the counter @return the counter for that name @deprecated]]> </doc> </method> <method name="incrCounter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Enum"/> <param name="amount" type="long"/> <doc> <![CDATA[Increments the specified counter by the specified amount, creating it if it didn't already exist. @param key identifies a counter @param amount amount by which counter is to be incremented]]> </doc> </method> <method name="incrCounter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="group" type="java.lang.String"/> <param name="counter" type="java.lang.String"/> <param name="amount" type="long"/> <doc> <![CDATA[Increments the specified counter by the specified amount, creating it if it didn't already exist. @param group the name of the group @param counter the internal name of the counter @param amount amount by which counter is to be incremented]]> </doc> </method> <method name="getCounter" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Enum"/> <doc> <![CDATA[Returns current value of the specified counter, or 0 if the counter does not exist.]]> </doc> </method> <method name="incrAllCounters" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="org.apache.hadoop.mapred.Counters"/> <doc> <![CDATA[Increments multiple counters by their amounts in another Counters instance. @param other the other Counters instance]]> </doc> </method> <method name="sum" return="org.apache.hadoop.mapred.Counters" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="a" type="org.apache.hadoop.mapred.Counters"/> <param name="b" type="org.apache.hadoop.mapred.Counters"/> <doc> <![CDATA[Convenience method for computing the sum of two sets of counters.]]> </doc> </method> <method name="size" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the total number of counters, by summing the number of counters in each group.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Write the set of groups. The external format is: #groups (groupName group)* i.e. the number of groups followed by 0 or more groups, where each group is of the form: groupDisplayName #counters (false | true counter)* where each counter is of the form: name (false | true displayName) value]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read a set of groups.]]> </doc> </method> <method name="log" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="log" type="org.apache.commons.logging.Log"/> <doc> <![CDATA[Logs the current counter values. @param log The log to use.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return textual representation of the counter values.]]> </doc> </method> <method name="makeCompactString" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Convert a counters object into a single line that is easy to parse. @return the string with "name=value" for each counter and separated by ","]]> </doc> </method> <method name="makeEscapedCompactString" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Represent the counter in a textual format that can be converted back to its object form @return the string in the following format {(groupname)(group-displayname)[(countername)(displayname)(value)][][]}{}{}]]> </doc> </method> <method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="compactString" type="java.lang.String"/> <exception name="ParseException" type="java.text.ParseException"/> <doc> <![CDATA[Convert a stringified counter representation into a counter object. Note that the counter can be recovered if its stringified using {@link #makeEscapedCompactString()}. @return a Counter]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="obj" type="java.lang.Object"/> </method> <doc> <![CDATA[A set of named counters. <p><code>Counters</code> represent global counters, defined either by the Map-Reduce framework or applications. Each <code>Counter</code> can be of any {@link Enum} type.</p> <p><code>Counters</code> are bunched into {@link Group}s, each comprising of counters from a particular <code>Enum</code> class. @deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.Counters --> <!-- start class org.apache.hadoop.mapred.Counters.Counter --> <class name="Counters.Counter" extends="org.apache.hadoop.mapreduce.Counter" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <method name="setDisplayName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="newName" type="java.lang.String"/> </method> <method name="makeEscapedCompactString" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the compact stringified version of the counter in the format [(actual-name)(display-name)(value)]]]> </doc> </method> <method name="getCounter" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[What is the current value of this counter? @return the current value]]> </doc> </method> <doc> <![CDATA[A counter record, comprising its name and value.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.Counters.Counter --> <!-- start class org.apache.hadoop.mapred.Counters.Group --> <class name="Counters.Group" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <implements name="java.lang.Iterable"/> <method name="getName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns raw name of the group. This is the name of the enum class for this group of counters.]]> </doc> </method> <method name="getDisplayName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns localized name of the group. This is the same as getName() by default, but different if an appropriate ResourceBundle is found.]]> </doc> </method> <method name="setDisplayName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="displayName" type="java.lang.String"/> <doc> <![CDATA[Set the display name]]> </doc> </method> <method name="makeEscapedCompactString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the compact stringified version of the group in the format {(actual-name)(display-name)(value)[][][]} where [] are compact strings for the counters within.]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="obj" type="java.lang.Object"/> <doc> <![CDATA[Checks for (content) equality of Groups]]> </doc> </method> <method name="getCounter" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counterName" type="java.lang.String"/> <doc> <![CDATA[Returns the value of the specified counter, or 0 if the counter does not exist.]]> </doc> </method> <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="use {@link #getCounter(String)} instead"> <param name="id" type="int"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Get the counter for the given id and create it if it doesn't exist. @param id the numeric id of the counter within the group @param name the internal counter name @return the counter @deprecated use {@link #getCounter(String)} instead]]> </doc> </method> <method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Get the counter for the given name and create it if it doesn't exist. @param name the internal counter name @return the counter]]> </doc> </method> <method name="size" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the number of counters in this group.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="iterator" return="java.util.Iterator" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[<code>Group</code> of counters, comprising of counters from a particular counter {@link Enum} class. <p><code>Group</code>handles localization of the class name and the counter names.</p>]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.Counters.Group --> <!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException --> <class name="FileAlreadyExistsException" extends="java.io.IOException" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FileAlreadyExistsException" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="FileAlreadyExistsException" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[Used when target file already exists for any operation and is not configured to be overwritten.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException --> <!-- start class org.apache.hadoop.mapred.FileInputFormat --> <class name="FileInputFormat" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} instead."> <implements name="org.apache.hadoop.mapred.InputFormat"/> <constructor name="FileInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setMinSplitSize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="minSplitSize" type="long"/> </method> <method name="isSplitable" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="filename" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be. <code>FileInputFormat</code> implementations can override this and return <code>false</code> to ensure that individual input files are never split-up so that {@link Mapper}s process entire files. @param fs the file system that the file is on @param filename the file name to check @return is this file splitable?]]> </doc> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setInputPathFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="filter" type="java.lang.Class"/> <doc> <![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job. @param filter the PathFilter class use for filtering the input paths.]]> </doc> </method> <method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get a PathFilter instance of the filter set for the input paths. @return the PathFilter instance set for the job, NULL if none has been set.]]> </doc> </method> <method name="addInputPathRecursively" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="result" type="java.util.List"/> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="inputFilter" type="org.apache.hadoop.fs.PathFilter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add files in the input path recursively into the results. @param result The List to store all files. @param fs The FileSystem. @param path The input path. @param inputFilter The input filter that can be used to filter files/dirs. @throws IOException]]> </doc> </method> <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[List input directories. Subclasses may override to, e.g., select only files matching a regular expression. @param job the job to list input paths for @return array of FileStatus objects @throws IOException if zero items.]]> </doc> </method> <method name="makeSplit" return="org.apache.hadoop.mapred.FileSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="file" type="org.apache.hadoop.fs.Path"/> <param name="start" type="long"/> <param name="length" type="long"/> <param name="hosts" type="java.lang.String[]"/> <doc> <![CDATA[A factory that makes the split for this class. It can be overridden by sub-classes to make sub-types]]> </doc> </method> <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="numSplits" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Splits files returned by {@link #listStatus(JobConf)} when they're too big.]]> </doc> </method> <method name="computeSplitSize" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="goalSize" type="long"/> <param name="minSize" type="long"/> <param name="blockSize" type="long"/> </method> <method name="getBlockIndex" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> <param name="offset" type="long"/> </method> <method name="setInputPaths" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="commaSeparatedPaths" type="java.lang.String"/> <doc> <![CDATA[Sets the given comma separated paths as the list of inputs for the map-reduce job. @param conf Configuration of the job @param commaSeparatedPaths Comma separated paths to be set as the list of inputs for the map-reduce job.]]> </doc> </method> <method name="addInputPaths" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="commaSeparatedPaths" type="java.lang.String"/> <doc> <![CDATA[Add the given comma separated paths to the list of inputs for the map-reduce job. @param conf The configuration of the job @param commaSeparatedPaths Comma separated paths to be added to the list of inputs for the map-reduce job.]]> </doc> </method> <method name="setInputPaths" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/> <doc> <![CDATA[Set the array of {@link Path}s as the list of inputs for the map-reduce job. @param conf Configuration of the job. @param inputPaths the {@link Path}s of the input directories/files for the map-reduce job.]]> </doc> </method> <method name="addInputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job. @param conf The configuration of the job @param path {@link Path} to be added to the list of inputs for the map-reduce job.]]> </doc> </method> <method name="getInputPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the list of input {@link Path}s for the map-reduce job. @param conf The configuration of the job @return the list of input {@link Path}s for the map-reduce job.]]> </doc> </method> <method name="getSplitHosts" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> <param name="offset" type="long"/> <param name="splitSize" type="long"/> <param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This function identifies and returns the hosts that contribute most for a given split. For calculating the contribution, rack locality is treated on par with host locality, so hosts from racks that contribute the most are preferred over hosts on racks that contribute less @param blkLocations The list of block locations @param offset @param splitSize @return array of hosts that contribute most to this split @throws IOException]]> </doc> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="NUM_INPUT_FILES" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[A base class for file-based {@link InputFormat}. <p><code>FileInputFormat</code> is the base class for all file-based <code>InputFormat</code>s. This provides a generic implementation of {@link #getSplits(JobConf, int)}. Subclasses of <code>FileInputFormat</code> can also override the {@link #isSplitable(FileSystem, Path)} method to ensure input-files are not split-up and are processed as a whole by {@link Mapper}s. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.FileInputFormat --> <!-- start class org.apache.hadoop.mapred.FileOutputCommitter --> <class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FileOutputCommitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setupJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.JobContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="commitJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.JobContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="cleanupJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.JobContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="abortJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.JobContext"/> <param name="runState" type="int"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setupTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="commitTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="abortTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="needsTaskCommit" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="TEMP_DIR_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Temporary directory name]]> </doc> </field> <field name="SUCCEEDED_FILE_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[An {@link OutputCommitter} that commits files specified in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.FileOutputCommitter --> <!-- start class org.apache.hadoop.mapred.FileOutputFormat --> <class name="FileOutputFormat" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.OutputFormat"/> <constructor name="FileOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setCompressOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="compress" type="boolean"/> <doc> <![CDATA[Set whether the output of the job is compressed. @param conf the {@link JobConf} to modify @param compress should the output of the job be compressed?]]> </doc> </method> <method name="getCompressOutput" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Is the job output compressed? @param conf the {@link JobConf} to look in @return <code>true</code> if the job output should be compressed, <code>false</code> otherwise]]> </doc> </method> <method name="setOutputCompressorClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="codecClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs. @param conf the {@link JobConf} to modify @param codecClass the {@link CompressionCodec} to be used to compress the job outputs]]> </doc> </method> <method name="getOutputCompressorClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="defaultValue" type="java.lang.Class"/> <doc> <![CDATA[Get the {@link CompressionCodec} for compressing the job outputs. @param conf the {@link JobConf} to look in @param defaultValue the {@link CompressionCodec} to return if not set @return the {@link CompressionCodec} to be used to compress the job outputs @throws IllegalArgumentException if the class was specified, but not found]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/> <exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setOutputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="outputDir" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Set the {@link Path} of the output directory for the map-reduce job. @param conf The configuration of the job. @param outputDir the {@link Path} of the output directory for the map-reduce job.]]> </doc> </method> <method name="getOutputPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the {@link Path} to the output directory for the map-reduce job. @return the {@link Path} to the output directory for the map-reduce job. @see FileOutputFormat#getWorkOutputPath(JobConf)]]> </doc> </method> <method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the {@link Path} to the task's temporary output directory for the map-reduce job <h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4> <p><i>Note:</i> The following is valid only if the {@link OutputCommitter} is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not a <code>FileOutputCommitter</code>, the task's temporary output directory is same as {@link #getOutputPath(JobConf)} i.e. <tt>${mapreduce.output.fileoutputformat.outputdir}$</tt></p> <p>Some applications need to create/write-to side-files, which differ from the actual job-outputs. <p>In such cases there could be issues with 2 instances of the same TIP (running simultaneously e.g. speculative tasks) trying to open/write-to the same file (path) on HDFS. Hence the application-writer will have to pick unique names per task-attempt (e.g. using the attemptid, say <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> <p>To get around this the Map-Reduce framework helps the application-writer out by maintaining a special <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> sub-directory for each task-attempt on HDFS where the output of the task-attempt goes. On successful completion of the task-attempt the files in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only) are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the framework discards the sub-directory of unsuccessful task-attempts. This is completely transparent to the application.</p> <p>The application-writer can take advantage of this by creating any side-files required in <tt>${mapreduce.task.output.dir}</tt> during execution of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the framework will move them out similarly - thus she doesn't have to pick unique paths per task-attempt.</p> <p><i>Note</i>: the value of <tt>${mapreduce.task.output.dir}</tt> during execution of a particular task-attempt is actually <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}</tt>, and this value is set by the map-reduce framework. So, just create any side-files in the path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce task to take advantage of this feature.</p> <p>The entire discussion holds true for maps of jobs with reducer=NONE (i.e. 0 reduces) since output of the map, in that case, goes directly to HDFS.</p> @return the {@link Path} to the task's temporary output directory for the map-reduce job.]]> </doc> </method> <method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Helper function to create the task's temporary output directory and return the path to the task's output file. @param conf job-configuration @param name temporary task-output filename @return path to the task's temporary output file @throws IOException]]> </doc> </method> <method name="getUniqueName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Helper function to generate a name that is unique for the task. <p>The generated name can be used to create custom files from within the different tasks for the job, the names for different tasks will not collide with each other.</p> <p>The given name is postfixed with the task type, 'm' for maps, 'r' for reduces and the task partition number. For example, give a name 'test' running on the first map o the job the generated name will be 'test-m-00000'.</p> @param conf the configuration for the job. @param name the name to make unique. @return a unique name accross all tasks of the job.]]> </doc> </method> <method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Helper function to generate a {@link Path} for a file that is unique for the task within the job output directory. <p>The path can be used to create custom files from within the map and reduce tasks. The path name will be unique for each task. The path parent will be the job output directory.</p>ls <p>This method uses the {@link #getUniqueName} method to make the file name unique for the task.</p> @param conf the configuration for the job. @param name the name for the file. @return a unique path accross all tasks of the job.]]> </doc> </method> <doc> <![CDATA[A base class for {@link OutputFormat}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.FileOutputFormat --> <!-- start class org.apache.hadoop.mapred.FileSplit --> <class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit} instead."> <implements name="org.apache.hadoop.mapred.InputSplit"/> <constructor name="FileSplit" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf" static="false" final="false" visibility="public" deprecated="deprecated, no comment"> <doc> <![CDATA[Constructs a split. @deprecated @param file the file name @param start the position of the first byte in the file to process @param length the number of bytes in the file to process]]> </doc> </constructor> <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a split with host information @param file the file name @param start the position of the first byte in the file to process @param length the number of bytes in the file to process @param hosts the list of hosts containing the block, possibly null]]> </doc> </constructor> <constructor name="FileSplit" type="org.apache.hadoop.mapreduce.lib.input.FileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The file containing this split's data.]]> </doc> </method> <method name="getStart" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The position of the first byte in the file to process.]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The number of bytes in the file to process.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A section of an input file. Returned by {@link InputFormat#getSplits(JobConf, int)} and passed to {@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.FileSplit --> <!-- start class org.apache.hadoop.mapred.ID --> <class name="ID" extends="org.apache.hadoop.mapreduce.ID" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ID" type="int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[constructs an ID object from the given int]]> </doc> </constructor> <constructor name="ID" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <doc> <![CDATA[A general identifier, which internally stores the id as an integer. This is the super class of {@link JobID}, {@link TaskID} and {@link TaskAttemptID}. @see JobID @see TaskID @see TaskAttemptID]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.ID --> <!-- start interface org.apache.hadoop.mapred.InputFormat --> <interface name="InputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.InputFormat} instead."> <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="numSplits" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Logically split the set of input files for the job. <p>Each {@link InputSplit} is then assigned to an individual {@link Mapper} for processing.</p> <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the input files are not physically split into chunks. For e.g. a split could be <i><input-file-path, start, offset></i> tuple. @param job job configuration. @param numSplits the desired number of splits, a hint. @return an array of {@link InputSplit}s for the job.]]> </doc> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}. <p>It is the responsibility of the <code>RecordReader</code> to respect record boundaries while processing the logical split to present a record-oriented view to the individual task.</p> @param split the {@link InputSplit} @param job the job that this split belongs to @return a {@link RecordReader}]]> </doc> </method> <doc> <![CDATA[<code>InputFormat</code> describes the input-specification for a Map-Reduce job. <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the job to:<p> <ol> <li> Validate the input-specification of the job. <li> Split-up the input file(s) into logical {@link InputSplit}s, each of which is then assigned to an individual {@link Mapper}. </li> <li> Provide the {@link RecordReader} implementation to be used to glean input records from the logical <code>InputSplit</code> for processing by the {@link Mapper}. </li> </ol> <p>The default behavior of file-based {@link InputFormat}s, typically sub-classes of {@link FileInputFormat}, is to split the input into <i>logical</i> {@link InputSplit}s based on the total size, in bytes, of the input files. However, the {@link FileSystem} blocksize of the input files is treated as an upper bound for input splits. A lower bound on the split size can be set via <a href="{@docRoot}/../mapred-default.html#mapreduce.input.fileinputformat.split.minsize"> mapreduce.input.fileinputformat.split.minsize</a>.</p> <p>Clearly, logical splits based on input-size is insufficient for many applications since record boundaries are to respected. In such cases, the application has to also implement a {@link RecordReader} on whom lies the responsibilty to respect record-boundaries and present a record-oriented view of the logical <code>InputSplit</code> to the individual task. @see InputSplit @see RecordReader @see JobClient @see FileInputFormat @deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.InputFormat --> <!-- start interface org.apache.hadoop.mapred.InputSplit --> <interface name="InputSplit" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.InputSplit} instead."> <implements name="org.apache.hadoop.io.Writable"/> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>. @return the number of bytes in the input split. @throws IOException]]> </doc> </method> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the list of hostnames where the input split is located. @return list of hostnames where data of the <code>InputSplit</code> is located as an array of <code>String</code>s. @throws IOException]]> </doc> </method> <doc> <![CDATA[<code>InputSplit</code> represents the data to be processed by an individual {@link Mapper}. <p>Typically, it presents a byte-oriented view on the input and is the responsibility of {@link RecordReader} of the job to process this and present a record-oriented view. @see InputFormat @see RecordReader @deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.InputSplit --> <!-- start class org.apache.hadoop.mapred.InvalidFileTypeException --> <class name="InvalidFileTypeException" extends="java.io.IOException" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="InvalidFileTypeException" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="InvalidFileTypeException" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[Used when file type differs from the desired file type. like getting a file when a directory is expected. Or a wrong file type.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.InvalidFileTypeException --> <!-- start class org.apache.hadoop.mapred.InvalidInputException --> <class name="InvalidInputException" extends="java.io.IOException" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="InvalidInputException" type="java.util.List" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create the exception with the given list. @param probs the list of problems to report. this list is not copied.]]> </doc> </constructor> <method name="getProblems" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the complete list of the problems reported. @return the list of problems, which must not be modified]]> </doc> </method> <method name="getMessage" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get a summary message of the problems found. @return the concatenated messages from all of the problems.]]> </doc> </method> <doc> <![CDATA[This class wraps a list of problems with the input, so that the user can get a list of problems together instead of finding and fixing them one by one.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.InvalidInputException --> <!-- start class org.apache.hadoop.mapred.InvalidJobConfException --> <class name="InvalidJobConfException" extends="java.io.IOException" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="InvalidJobConfException" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="InvalidJobConfException" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This exception is thrown when jobconf misses some mendatory attributes or value of some attributes is invalid.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.InvalidJobConfException --> <!-- start class org.apache.hadoop.mapred.IsolationRunner --> <class name="IsolationRunner" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="IsolationRunner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Run a single task. @param args the first argument is the task directory]]> </doc> </method> <doc> <![CDATA[IsolationRunner is intended to facilitate debugging by re-running a specific task, given left-over task files for a (typically failed) past job. Currently, it is limited to re-running map tasks. Users may coerce MapReduce to keep task files around by setting mapreduce.task.files.preserve.failedtasks. See mapred_tutorial.xml for more documentation.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.IsolationRunner --> <!-- start class org.apache.hadoop.mapred.JobClient --> <class name="JobClient" extends="org.apache.hadoop.mapreduce.tools.CLI" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link Job} and {@link Cluster} instead"> <constructor name="JobClient" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a job client.]]> </doc> </constructor> <constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Build a job client with the given {@link JobConf}, and connect to the default {@link JobTracker}. @param conf the job configuration. @throws IOException]]> </doc> </constructor> <constructor name="JobClient" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Build a job client with the given {@link Configuration}, and connect to the default {@link JobTracker}. @param conf the configuration. @throws IOException]]> </doc> </constructor> <constructor name="JobClient" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Build a job client, connect to the indicated job tracker. @param jobTrackAddr the job tracker to connect to. @param conf configuration.]]> </doc> </constructor> <method name="init" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Connect to the default {@link JobTracker}. @param conf the job configuration. @throws IOException]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close the <code>JobClient</code>.]]> </doc> </method> <method name="getFs" return="org.apache.hadoop.fs.FileSystem" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get a filesystem handle. We need this to prepare jobs for submission to the MapReduce system. @return the filesystem handle.]]> </doc> </method> <method name="getClusterHandle" return="org.apache.hadoop.mapreduce.Cluster" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get a handle to the Cluster]]> </doc> </method> <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobFile" type="java.lang.String"/> <exception name="FileNotFoundException" type="java.io.FileNotFoundException"/> <exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Submit a job to the MR system. This returns a handle to the {@link RunningJob} which can be used to track the running-job. @param jobFile the job configuration. @return a handle to the {@link RunningJob} which can be used to track the running-job. @throws FileNotFoundException @throws InvalidJobConfException @throws IOException]]> </doc> </method> <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <exception name="FileNotFoundException" type="java.io.FileNotFoundException"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Submit a job to the MR system. This returns a handle to the {@link RunningJob} which can be used to track the running-job. @param conf the job configuration. @return a handle to the {@link RunningJob} which can be used to track the running-job. @throws FileNotFoundException @throws IOException]]> </doc> </method> <method name="getJob" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobid" type="org.apache.hadoop.mapred.JobID"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns null if the id does not correspond to any known job. @param jobid the jobid of the job. @return the {@link RunningJob} handle to track the job, null if the <code>jobid</code> doesn't correspond to any known job. @throws IOException]]> </doc> </method> <method name="getJob" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Applications should rather use {@link #getJob(JobID)}."> <param name="jobid" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]> </doc> </method> <method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the information of the current state of the map tasks of a job. @param jobId the job to query. @return the list of all of the map tips. @throws IOException]]> </doc> </method> <method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}"> <param name="jobId" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]> </doc> </method> <method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the information of the current state of the reduce tasks of a job. @param jobId the job to query. @return the list of all of the reduce tips. @throws IOException]]> </doc> </method> <method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the information of the current state of the cleanup tasks of a job. @param jobId the job to query. @return the list of all of the cleanup tips. @throws IOException]]> </doc> </method> <method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the information of the current state of the setup tasks of a job. @param jobId the job to query. @return the list of all of the setup tips. @throws IOException]]> </doc> </method> <method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}"> <param name="jobId" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]> </doc> </method> <method name="displayTasks" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobId" type="org.apache.hadoop.mapred.JobID"/> <param name="type" type="java.lang.String"/> <param name="state" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Display the information about a job's tasks, of a particular type and in a particular state @param jobId the ID of the job @param type the type of the task (map/reduce/setup/cleanup) @param state the state of the task (pending/running/completed/failed/killed)]]> </doc> </method> <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get status information about the Map-Reduce cluster. @return the status information about the Map-Reduce cluster as an object of {@link ClusterStatus}. @throws IOException]]> </doc> </method> <method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="detailed" type="boolean"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get status information about the Map-Reduce cluster. @param detailed if true then get a detailed status including the tracker names @return the status information about the Map-Reduce cluster as an object of {@link ClusterStatus}. @throws IOException]]> </doc> </method> <method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the jobs that are not completed and not failed. @return array of {@link JobStatus} for the running/to-be-run jobs. @throws IOException]]> </doc> </method> <method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the jobs that are submitted. @return array of {@link JobStatus} for the submitted jobs. @throws IOException]]> </doc> </method> <method name="runJob" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Utility that submits a job, then polls for progress until the job is complete. @param job the job configuration. @throws IOException if the job fails]]> </doc> </method> <method name="monitorAndPrintJob" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="job" type="org.apache.hadoop.mapred.RunningJob"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Monitor a job and print status in real-time as progress is made and tasks fail. @param conf the job's configuration @param job the job to track @return true if the job succeeded @throws IOException if communication to the JobTracker fails]]> </doc> </method> <method name="setTaskOutputFilter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/> <doc> <![CDATA[Sets the output filter for tasks. only those tasks are printed whose output matches the filter. @param newValue task filter.]]> </doc> </method> <method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the task output filter out of the JobConf. @param job the JobConf to examine. @return the filter level.]]> </doc> </method> <method name="setTaskOutputFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/> <doc> <![CDATA[Modify the JobConf to set the task output filter. @param job the JobConf to modify. @param newValue the value to set.]]> </doc> </method> <method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns task output filter. @return task filter.]]> </doc> </method> <method name="getCounter" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="cntrs" type="org.apache.hadoop.mapreduce.Counters"/> <param name="counterGroupName" type="java.lang.String"/> <param name="counterName" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getDefaultMaps" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get status information about the max available Maps in the cluster. @return the max available Maps in the cluster @throws IOException]]> </doc> </method> <method name="getDefaultReduces" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get status information about the max available Reduces in the cluster. @return the max available Reduces in the cluster @throws IOException]]> </doc> </method> <method name="getSystemDir" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed. @return the system directory where job-specific files are to be placed.]]> </doc> </method> <method name="getRootQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Returns an array of queue information objects about root level queues configured @return the array of root level JobQueueInfo objects @throws IOException]]> </doc> </method> <method name="getChildQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Returns an array of queue information objects about immediate children of queue queueName. @param queueName @return the array of immediate children JobQueueInfo objects @throws IOException]]> </doc> </method> <method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return an array of queue information objects about all the Job Queues configured. @return Array of JobQueueInfo objects @throws IOException]]> </doc> </method> <method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Gets all the jobs which were added to particular Job Queue @param queueName name of the Job Queue @return Array of jobs present in the job queue @throws IOException]]> </doc> </method> <method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Gets the queue information associated to a particular Job Queue @param queueName name of the job queue. @return Queue information associated to particular queue. @throws IOException]]> </doc> </method> <method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapred.QueueAclsInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Gets the Queue ACLs for current user @return array of QueueAclsInfo object for current user. @throws IOException]]> </doc> </method> <method name="getDelegationToken" return="org.apache.hadoop.security.token.Token" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="renewer" type="org.apache.hadoop.io.Text"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get a delegation token for the user from the JobTracker. @param renewer the user who can renew the token @return the new token @throws IOException]]> </doc> </method> <method name="renewDelegationToken" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="token" type="org.apache.hadoop.security.token.Token"/> <exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Renew a delegation token @param token the token to renew @return true if the renewal went well @throws InvalidToken @throws IOException]]> </doc> </method> <method name="cancelDelegationToken" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="token" type="org.apache.hadoop.security.token.Token"/> <exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Cancel a delegation token from the JobTracker @param token the token to cancel @throws IOException]]> </doc> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="argv" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <doc> <![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact with the {@link JobTracker}. <code>JobClient</code> provides facilities to submit jobs, track their progress, access component-tasks' reports/logs, get the Map-Reduce cluster status information etc. <p>The job submission process involves: <ol> <li> Checking the input and output specifications of the job. </li> <li> Computing the {@link InputSplit}s for the job. </li> <li> Setup the requisite accounting information for the {@link DistributedCache} of the job, if necessary. </li> <li> Copying the job's jar and configuration to the map-reduce system directory on the distributed file-system. </li> <li> Submitting the job to the <code>JobTracker</code> and optionally monitoring it's status. </li> </ol></p> Normally the user creates the application, describes various facets of the job via {@link JobConf} and then uses the <code>JobClient</code> to submit the job and monitor its progress. <p>Here is an example on how to use <code>JobClient</code>:</p> <p><blockquote><pre> // Create a new JobConf JobConf job = new JobConf(new Configuration(), MyJob.class); // Specify various job-specific parameters job.setJobName("myjob"); job.setInputPath(new Path("in")); job.setOutputPath(new Path("out")); job.setMapperClass(MyJob.MyMapper.class); job.setReducerClass(MyJob.MyReducer.class); // Submit the job, then poll for progress until the job is complete JobClient.runJob(job); </pre></blockquote></p> <h4 id="JobControl">Job Control</h4> <p>At times clients would chain map-reduce jobs to accomplish complex tasks which cannot be done via a single map-reduce job. This is fairly easy since the output of the job, typically, goes to distributed file-system and that can be used as the input for the next job.</p> <p>However, this also means that the onus on ensuring jobs are complete (success/failure) lies squarely on the clients. In such situations the various job-control options are: <ol> <li> {@link #runJob(JobConf)} : submits the job and returns only after the job has completed. </li> <li> {@link #submitJob(JobConf)} : only submits the job, then poll the returned handle to the {@link RunningJob} to query status and make scheduling decisions. </li> <li> {@link JobConf#setJobEndNotificationURI(String)} : setup a notification on job-completion, thus avoiding polling. </li> </ol></p> @see JobConf @see ClusterStatus @see Tool @see DistributedCache @deprecated Use {@link Job} and {@link Cluster} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.JobClient --> <!-- start class org.apache.hadoop.mapred.JobClient.TaskStatusFilter --> <class name="JobClient.TaskStatusFilter" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapred.JobClient.TaskStatusFilter --> <!-- start class org.apache.hadoop.mapred.JobConf --> <class name="JobConf" extends="org.apache.hadoop.conf.Configuration" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link Configuration} instead"> <constructor name="JobConf" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a map/reduce job configuration.]]> </doc> </constructor> <constructor name="JobConf" type="java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a map/reduce job configuration. @param exampleClass a class whose containing jar is used as the job's jar.]]> </doc> </constructor> <constructor name="JobConf" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a map/reduce job configuration. @param conf a Configuration whose settings will be inherited.]]> </doc> </constructor> <constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a map/reduce job configuration. @param conf a Configuration whose settings will be inherited. @param exampleClass a class whose containing jar is used as the job's jar.]]> </doc> </constructor> <constructor name="JobConf" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a map/reduce configuration. @param config a Configuration-format XML job description file.]]> </doc> </constructor> <constructor name="JobConf" type="org.apache.hadoop.fs.Path" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a map/reduce configuration. @param config a Configuration-format XML job description file.]]> </doc> </constructor> <constructor name="JobConf" type="boolean" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[A new map/reduce configuration where the behavior of reading from the default resources can be turned off. <p/> If the parameter {@code loadDefaults} is false, the new instance will not load resources from the default files. @param loadDefaults specifies whether to load from the default files]]> </doc> </constructor> <method name="getCredentials" return="org.apache.hadoop.security.Credentials" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get credentials for the job. @return credentials for the job]]> </doc> </method> <method name="getJar" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user jar for the map-reduce job. @return the user jar for the map-reduce job.]]> </doc> </method> <method name="setJar" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jar" type="java.lang.String"/> <doc> <![CDATA[Set the user jar for the map-reduce job. @param jar the user jar for the map-reduce job.]]> </doc> </method> <method name="getJarUnpackPattern" return="java.util.regex.Pattern" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the pattern for jar contents to unpack on the tasktracker]]> </doc> </method> <method name="setJarByClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <doc> <![CDATA[Set the job's jar file by finding an example class location. @param cls the example class.]]> </doc> </method> <method name="getLocalDirs" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="deleteLocalFiles" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Use MRAsyncDiskService.moveAndDeleteAllVolumes instead. @see org.apache.hadoop.mapreduce.util.MRAsyncDiskService#cleanupAllVolumes()]]> </doc> </method> <method name="deleteLocalFiles" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="subdir" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getLocalPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="pathString" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructs a local file name. Files are distributed among configured local directories.]]> </doc> </method> <method name="getUser" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the reported username for this job. @return the username]]> </doc> </method> <method name="setUser" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="user" type="java.lang.String"/> <doc> <![CDATA[Set the reported username for this job. @param user the username for this job.]]> </doc> </method> <method name="setKeepFailedTaskFiles" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="keep" type="boolean"/> <doc> <![CDATA[Set whether the framework should keep the intermediate files for failed tasks. @param keep <code>true</code> if framework should keep the intermediate files for failed tasks, <code>false</code> otherwise.]]> </doc> </method> <method name="getKeepFailedTaskFiles" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Should the temporary files for failed tasks be kept? @return should the files be kept?]]> </doc> </method> <method name="setKeepTaskFilesPattern" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="pattern" type="java.lang.String"/> <doc> <![CDATA[Set a regular expression for task names that should be kept. The regular expression ".*_m_000123_0" would keep the files for the first instance of map 123 that ran. @param pattern the java.util.regex.Pattern to match against the task names.]]> </doc> </method> <method name="getKeepTaskFilesPattern" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the regular expression that is matched against the task names to see if we need to keep the files. @return the pattern as a string, if it was set, othewise null.]]> </doc> </method> <method name="setWorkingDirectory" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="dir" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Set the current working directory for the default file system. @param dir the new current working directory.]]> </doc> </method> <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the current working directory for the default file system. @return the directory name.]]> </doc> </method> <method name="setNumTasksToExecutePerJvm" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="numTasks" type="int"/> <doc> <![CDATA[Sets the number of tasks that a spawned task JVM should run before it exits @param numTasks the number of tasks to execute; defaults to 1; -1 signifies no limit]]> </doc> </method> <method name="getNumTasksToExecutePerJvm" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of tasks that a spawned JVM should execute]]> </doc> </method> <method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link InputFormat} implementation for the map-reduce job, defaults to {@link TextInputFormat} if not specified explicity. @return the {@link InputFormat} implementation for the map-reduce job.]]> </doc> </method> <method name="setInputFormat" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link InputFormat} implementation for the map-reduce job. @param theClass the {@link InputFormat} implementation for the map-reduce job.]]> </doc> </method> <method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job, defaults to {@link TextOutputFormat} if not specified explicity. @return the {@link OutputFormat} implementation for the map-reduce job.]]> </doc> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job, defaults to {@link FileOutputCommitter} if not specified explicitly. @return the {@link OutputCommitter} implementation for the map-reduce job.]]> </doc> </method> <method name="setOutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job. @param theClass the {@link OutputCommitter} implementation for the map-reduce job.]]> </doc> </method> <method name="setOutputFormat" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job. @param theClass the {@link OutputFormat} implementation for the map-reduce job.]]> </doc> </method> <method name="setCompressMapOutput" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="compress" type="boolean"/> <doc> <![CDATA[Should the map outputs be compressed before transfer? Uses the SequenceFile compression. @param compress should the map outputs be compressed?]]> </doc> </method> <method name="getCompressMapOutput" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Are the outputs of the maps be compressed? @return <code>true</code> if the outputs of the maps are to be compressed, <code>false</code> otherwise.]]> </doc> </method> <method name="setMapOutputCompressorClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="codecClass" type="java.lang.Class"/> <doc> <![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs. @param codecClass the {@link CompressionCodec} class that will compress the map outputs.]]> </doc> </method> <method name="getMapOutputCompressorClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="defaultValue" type="java.lang.Class"/> <doc> <![CDATA[Get the {@link CompressionCodec} for compressing the map outputs. @param defaultValue the {@link CompressionCodec} to return if not set @return the {@link CompressionCodec} class that should be used to compress the map outputs. @throws IllegalArgumentException if the class was specified, but not found]]> </doc> </method> <method name="getMapOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the key class for the map output data. If it is not set, use the (final) output key class. This allows the map output key class to be different than the final output key class. @return the map output key class.]]> </doc> </method> <method name="setMapOutputKeyClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the key class for the map output data. This allows the user to specify the map output key class to be different than the final output value class. @param theClass the map output key class.]]> </doc> </method> <method name="getMapOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the value class for the map output data. If it is not set, use the (final) output value class This allows the map output value class to be different than the final output value class. @return the map output value class.]]> </doc> </method> <method name="setMapOutputValueClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the value class for the map output data. This allows the user to specify the map output value class to be different than the final output value class. @param theClass the map output value class.]]> </doc> </method> <method name="getOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the key class for the job output data. @return the key class for the job output data.]]> </doc> </method> <method name="setOutputKeyClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the key class for the job output data. @param theClass the key class for the job output data.]]> </doc> </method> <method name="getOutputKeyComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link RawComparator} comparator used to compare keys. @return the {@link RawComparator} comparator used to compare keys.]]> </doc> </method> <method name="setOutputKeyComparatorClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link RawComparator} comparator used to compare keys. @param theClass the {@link RawComparator} comparator used to compare keys. @see #setOutputValueGroupingComparator(Class)]]> </doc> </method> <method name="setKeyFieldComparatorOptions" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="keySpec" type="java.lang.String"/> <doc> <![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys. @param keySpec the key specification of the form -k pos1[,pos2], where, pos is of the form f[.c][opts], where f is the number of the key field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field). opts are ordering options. The supported options are: -n, (Sort numerically) -r, (Reverse the result of comparison)]]> </doc> </method> <method name="getKeyFieldComparatorOption" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link KeyFieldBasedComparator} options]]> </doc> </method> <method name="setKeyFieldPartitionerOptions" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="keySpec" type="java.lang.String"/> <doc> <![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for {@link Partitioner} @param keySpec the key specification of the form -k pos1[,pos2], where, pos is of the form f[.c][opts], where f is the number of the key field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field).]]> </doc> </method> <method name="getKeyFieldPartitionerOption" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]> </doc> </method> <method name="getOutputValueGroupingComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user defined {@link WritableComparable} comparator for grouping keys of inputs to the reduce. @return comparator set by the user for grouping values. @see #setOutputValueGroupingComparator(Class) for details.]]> </doc> </method> <method name="setOutputValueGroupingComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the user defined {@link RawComparator} comparator for grouping keys in the input to the reduce. <p>This comparator should be provided if the equivalence rules for keys for sorting the intermediates are different from those for grouping keys before each call to {@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p> <p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed in a single call to the reduce function if K1 and K2 compare as equal.</p> <p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control how keys are sorted, this can be used in conjunction to simulate <i>secondary sort on values</i>.</p> <p><i>Note</i>: This is not a guarantee of the reduce sort being <i>stable</i> in any sense. (In any case, with the order of available map-outputs to the reduce being non-deterministic, it wouldn't make that much sense.)</p> @param theClass the comparator class to be used for grouping keys. It should implement <code>RawComparator</code>. @see #setOutputKeyComparatorClass(Class)]]> </doc> </method> <method name="getUseNewMapper" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Should the framework use the new context-object code for running the mapper? @return true, if the new api should be used]]> </doc> </method> <method name="setUseNewMapper" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="flag" type="boolean"/> <doc> <![CDATA[Set whether the framework should use the new api for the mapper. This is the default for jobs submitted with the new Job api. @param flag true, if the new api should be used]]> </doc> </method> <method name="getUseNewReducer" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Should the framework use the new context-object code for running the reducer? @return true, if the new api should be used]]> </doc> </method> <method name="setUseNewReducer" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="flag" type="boolean"/> <doc> <![CDATA[Set whether the framework should use the new api for the reducer. This is the default for jobs submitted with the new Job api. @param flag true, if the new api should be used]]> </doc> </method> <method name="getOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the value class for job outputs. @return the value class for job outputs.]]> </doc> </method> <method name="setOutputValueClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the value class for job outputs. @param theClass the value class for job outputs.]]> </doc> </method> <method name="getMapperClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link Mapper} class for the job. @return the {@link Mapper} class for the job.]]> </doc> </method> <method name="setMapperClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link Mapper} class for the job. @param theClass the {@link Mapper} class for the job.]]> </doc> </method> <method name="getMapRunnerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link MapRunnable} class for the job. @return the {@link MapRunnable} class for the job.]]> </doc> </method> <method name="setMapRunnerClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Expert: Set the {@link MapRunnable} class for the job. Typically used to exert greater control on {@link Mapper}s. @param theClass the {@link MapRunnable} class for the job.]]> </doc> </method> <method name="getPartitionerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link Partitioner} used to partition {@link Mapper}-outputs to be sent to the {@link Reducer}s. @return the {@link Partitioner} used to partition map-outputs.]]> </doc> </method> <method name="setPartitionerClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link Partitioner} class used to partition {@link Mapper}-outputs to be sent to the {@link Reducer}s. @param theClass the {@link Partitioner} used to partition map-outputs.]]> </doc> </method> <method name="getReducerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link Reducer} class for the job. @return the {@link Reducer} class for the job.]]> </doc> </method> <method name="setReducerClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link Reducer} class for the job. @param theClass the {@link Reducer} class for the job.]]> </doc> </method> <method name="getCombinerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user-defined <i>combiner</i> class used to combine map-outputs before being sent to the reducers. Typically the combiner is same as the the {@link Reducer} for the job i.e. {@link #getReducerClass()}. @return the user-defined combiner class used to combine map-outputs.]]> </doc> </method> <method name="setCombinerClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the user-defined <i>combiner</i> class used to combine map-outputs before being sent to the reducers. <p>The combiner is an application-specified aggregation operation, which can help cut down the amount of data transferred between the {@link Mapper} and the {@link Reducer}, leading to better performance.</p> <p>The framework may invoke the combiner 0, 1, or multiple times, in both the mapper and reducer tasks. In general, the combiner is called as the sort/merge result is written to disk. The combiner must: <ul> <li> be side-effect free</li> <li> have the same input and output key types and the same input and output value types</li> </ul></p> <p>Typically the combiner is same as the <code>Reducer</code> for the job i.e. {@link #setReducerClass(Class)}.</p> @param theClass the user-defined combiner class used to combine map-outputs.]]> </doc> </method> <method name="getSpeculativeExecution" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Should speculative execution be used for this job? Defaults to <code>true</code>. @return <code>true</code> if speculative execution be used for this job, <code>false</code> otherwise.]]> </doc> </method> <method name="setSpeculativeExecution" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="speculativeExecution" type="boolean"/> <doc> <![CDATA[Turn speculative execution on or off for this job. @param speculativeExecution <code>true</code> if speculative execution should be turned on, else <code>false</code>.]]> </doc> </method> <method name="getMapSpeculativeExecution" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Should speculative execution be used for this job for map tasks? Defaults to <code>true</code>. @return <code>true</code> if speculative execution be used for this job for map tasks, <code>false</code> otherwise.]]> </doc> </method> <method name="setMapSpeculativeExecution" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="speculativeExecution" type="boolean"/> <doc> <![CDATA[Turn speculative execution on or off for this job for map tasks. @param speculativeExecution <code>true</code> if speculative execution should be turned on for map tasks, else <code>false</code>.]]> </doc> </method> <method name="getReduceSpeculativeExecution" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Should speculative execution be used for this job for reduce tasks? Defaults to <code>true</code>. @return <code>true</code> if speculative execution be used for reduce tasks for this job, <code>false</code> otherwise.]]> </doc> </method> <method name="setReduceSpeculativeExecution" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="speculativeExecution" type="boolean"/> <doc> <![CDATA[Turn speculative execution on or off for this job for reduce tasks. @param speculativeExecution <code>true</code> if speculative execution should be turned on for reduce tasks, else <code>false</code>.]]> </doc> </method> <method name="getNumMapTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get configured the number of reduce tasks for this job. Defaults to <code>1</code>. @return the number of reduce tasks for this job.]]> </doc> </method> <method name="setNumMapTasks" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="n" type="int"/> <doc> <![CDATA[Set the number of map tasks for this job. <p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual number of spawned map tasks depends on the number of {@link InputSplit}s generated by the job's {@link InputFormat#getSplits(JobConf, int)}. A custom {@link InputFormat} is typically used to accurately control the number of map tasks for the job.</p> <h4 id="NoOfMaps">How many maps?</h4> <p>The number of maps is usually driven by the total size of the inputs i.e. total number of blocks of the input files.</p> <p>The right level of parallelism for maps seems to be around 10-100 maps per-node, although it has been set up to 300 or so for very cpu-light map tasks. Task setup takes awhile, so it is best if the maps take at least a minute to execute.</p> <p>The default behavior of file-based {@link InputFormat}s is to split the input into <i>logical</i> {@link InputSplit}s based on the total size, in bytes, of input files. However, the {@link FileSystem} blocksize of the input files is treated as an upper bound for input splits. A lower bound on the split size can be set via <a href="{@docRoot}/../mapred-default.html#mapreduce.input.fileinputformat.split.minsize"> mapreduce.input.fileinputformat.split.minsize</a>.</p> <p>Thus, if you expect 10TB of input data and have a blocksize of 128MB, you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is used to set it even higher.</p> @param n the number of map tasks for this job. @see InputFormat#getSplits(JobConf, int) @see FileInputFormat @see FileSystem#getDefaultBlockSize() @see FileStatus#getBlockSize()]]> </doc> </method> <method name="getNumReduceTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get configured the number of reduce tasks for this job. Defaults to <code>1</code>. @return the number of reduce tasks for this job.]]> </doc> </method> <method name="setNumReduceTasks" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="n" type="int"/> <doc> <![CDATA[Set the requisite number of reduce tasks for this job. <h4 id="NoOfReduces">How many reduces?</h4> <p>The right number of reduces seems to be <code>0.95</code> or <code>1.75</code> multiplied by (<<i>no. of nodes</i>> * <a href="{@docRoot}/../mapred-default.html#mapreduce.tasktracker.reduce.tasks.maximum"> mapreduce.tasktracker.reduce.tasks.maximum</a>). </p> <p>With <code>0.95</code> all of the reduces can launch immediately and start transfering map outputs as the maps finish. With <code>1.75</code> the faster nodes will finish their first round of reduces and launch a second wave of reduces doing a much better job of load balancing.</p> <p>Increasing the number of reduces increases the framework overhead, but increases load balancing and lowers the cost of failures.</p> <p>The scaling factors above are slightly less than whole numbers to reserve a few reduce slots in the framework for speculative-tasks, failures etc.</p> <h4 id="ReducerNone">Reducer NONE</h4> <p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p> <p>In this case the output of the map-tasks directly go to distributed file-system, to the path set by {@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the framework doesn't sort the map-outputs before writing it out to HDFS.</p> @param n the number of reduce tasks for this job.]]> </doc> </method> <method name="getMaxMapAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the configured number of maximum attempts that will be made to run a map task, as specified by the <code>mapreduce.map.maxattempts</code> property. If this property is not already set, the default is 4 attempts. @return the max number of attempts per map task.]]> </doc> </method> <method name="setMaxMapAttempts" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="n" type="int"/> <doc> <![CDATA[Expert: Set the number of maximum attempts that will be made to run a map task. @param n the number of attempts per map task.]]> </doc> </method> <method name="getMaxReduceAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the configured number of maximum attempts that will be made to run a reduce task, as specified by the <code>mapreduce.reduce.maxattempts</code> property. If this property is not already set, the default is 4 attempts. @return the max number of attempts per reduce task.]]> </doc> </method> <method name="setMaxReduceAttempts" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="n" type="int"/> <doc> <![CDATA[Expert: Set the number of maximum attempts that will be made to run a reduce task. @param n the number of attempts per reduce task.]]> </doc> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user-specified job name. This is only used to identify the job to the user. @return the job's name, defaulting to "".]]> </doc> </method> <method name="setJobName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Set the user-specified job name. @param name the job's new name.]]> </doc> </method> <method name="getSessionId" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user-specified session identifier. The default is the empty string. The session identifier is used to tag metric data that is reported to some performance metrics system via the org.apache.hadoop.metrics API. The session identifier is intended, in particular, for use by Hadoop-On-Demand (HOD) which allocates a virtual Hadoop cluster dynamically and transiently. HOD will set the session identifier by modifying the mapred-site.xml file before starting the cluster. When not running under HOD, this identifer is expected to remain set to the empty string. @return the session identifier, defaulting to "".]]> </doc> </method> <method name="setSessionId" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="sessionId" type="java.lang.String"/> <doc> <![CDATA[Set the user-specified session identifier. @param sessionId the new session id.]]> </doc> </method> <method name="setMaxTaskFailuresPerTracker" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="noFailures" type="int"/> <doc> <![CDATA[Set the maximum no. of failures of a given job per tasktracker. If the no. of task failures exceeds <code>noFailures</code>, the tasktracker is <i>blacklisted</i> for this job. @param noFailures maximum no. of failures of a given job per tasktracker.]]> </doc> </method> <method name="getMaxTaskFailuresPerTracker" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Expert: Get the maximum no. of failures of a given job per tasktracker. If the no. of task failures exceeds this, the tasktracker is <i>blacklisted</i> for this job. @return the maximum no. of failures of a given job per tasktracker.]]> </doc> </method> <method name="getMaxMapTaskFailuresPercent" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the maximum percentage of map tasks that can fail without the job being aborted. Each map task is executed a minimum of {@link #getMaxMapAttempts()} attempts before being declared as <i>failed</i>. Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in the job being declared as {@link JobStatus#FAILED}. @return the maximum percentage of map tasks that can fail without the job being aborted.]]> </doc> </method> <method name="setMaxMapTaskFailuresPercent" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="percent" type="int"/> <doc> <![CDATA[Expert: Set the maximum percentage of map tasks that can fail without the job being aborted. Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts before being declared as <i>failed</i>. @param percent the maximum percentage of map tasks that can fail without the job being aborted.]]> </doc> </method> <method name="getMaxReduceTaskFailuresPercent" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the maximum percentage of reduce tasks that can fail without the job being aborted. Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()} attempts before being declared as <i>failed</i>. Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results in the job being declared as {@link JobStatus#FAILED}. @return the maximum percentage of reduce tasks that can fail without the job being aborted.]]> </doc> </method> <method name="setMaxReduceTaskFailuresPercent" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="percent" type="int"/> <doc> <![CDATA[Set the maximum percentage of reduce tasks that can fail without the job being aborted. Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()} attempts before being declared as <i>failed</i>. @param percent the maximum percentage of reduce tasks that can fail without the job being aborted.]]> </doc> </method> <method name="setJobPriority" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="prio" type="org.apache.hadoop.mapred.JobPriority"/> <doc> <![CDATA[Set {@link JobPriority} for this job. @param prio the {@link JobPriority} for this job.]]> </doc> </method> <method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link JobPriority} for this job. @return the {@link JobPriority} for this job.]]> </doc> </method> <method name="getProfileEnabled" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get whether the task profiling is enabled. @return true if some tasks will be profiled]]> </doc> </method> <method name="setProfileEnabled" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="newValue" type="boolean"/> <doc> <![CDATA[Set whether the system should collect profiler information for some of the tasks in this job? The information is stored in the user log directory. @param newValue true means it should be gathered]]> </doc> </method> <method name="getProfileParams" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the profiler configuration arguments. The default value for this property is "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s" @return the parameters to pass to the task child to configure profiling]]> </doc> </method> <method name="setProfileParams" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="value" type="java.lang.String"/> <doc> <![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it will be replaced with the name of the profiling output file when the task runs. This value is passed to the task child JVM on the command line. @param value the configuration string]]> </doc> </method> <method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="isMap" type="boolean"/> <doc> <![CDATA[Get the range of maps or reduces to profile. @param isMap is the task a map? @return the task ranges]]> </doc> </method> <method name="setProfileTaskRange" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="isMap" type="boolean"/> <param name="newValue" type="java.lang.String"/> <doc> <![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true) must also be called. @param newValue a set of integer ranges of the map ids]]> </doc> </method> <method name="setMapDebugScript" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="mDbgScript" type="java.lang.String"/> <doc> <![CDATA[Set the debug script to run when the map tasks fail. <p>The debug script can aid debugging of failed map tasks. The script is given task's stdout, stderr, syslog, jobconf files as arguments.</p> <p>The debug command, run on the node where the map failed, is:</p> <p><pre><blockquote> $script $stdout $stderr $syslog $jobconf. </blockquote></pre></p> <p> The script file is distributed through {@link DistributedCache} APIs. The script needs to be symlinked. </p> <p>Here is an example on how to submit a script <p><blockquote><pre> job.setMapDebugScript("./myscript"); DistributedCache.createSymlink(job); DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); </pre></blockquote></p> @param mDbgScript the script name]]> </doc> </method> <method name="getMapDebugScript" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the map task's debug script. @return the debug Script for the mapred job for failed map tasks. @see #setMapDebugScript(String)]]> </doc> </method> <method name="setReduceDebugScript" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="rDbgScript" type="java.lang.String"/> <doc> <![CDATA[Set the debug script to run when the reduce tasks fail. <p>The debug script can aid debugging of failed reduce tasks. The script is given task's stdout, stderr, syslog, jobconf files as arguments.</p> <p>The debug command, run on the node where the map failed, is:</p> <p><pre><blockquote> $script $stdout $stderr $syslog $jobconf. </blockquote></pre></p> <p> The script file is distributed through {@link DistributedCache} APIs. The script file needs to be symlinked </p> <p>Here is an example on how to submit a script <p><blockquote><pre> job.setReduceDebugScript("./myscript"); DistributedCache.createSymlink(job); DistributedCache.addCacheFile("/debug/scripts/myscript#myscript"); </pre></blockquote></p> @param rDbgScript the script name]]> </doc> </method> <method name="getReduceDebugScript" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the reduce task's debug Script @return the debug script for the mapred job for failed reduce tasks. @see #setReduceDebugScript(String)]]> </doc> </method> <method name="getJobEndNotificationURI" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the uri to be invoked in-order to send a notification after the job has completed (success/failure). @return the job end notification uri, <code>null</code> if it hasn't been set. @see #setJobEndNotificationURI(String)]]> </doc> </method> <method name="setJobEndNotificationURI" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="uri" type="java.lang.String"/> <doc> <![CDATA[Set the uri to be invoked in-order to send a notification after the job has completed (success/failure). <p>The uri can contain 2 special parameters: <tt>$jobId</tt> and <tt>$jobStatus</tt>. Those, if present, are replaced by the job's identifier and completion-status respectively.</p> <p>This is typically used by application-writers to implement chaining of Map-Reduce jobs in an <i>asynchronous manner</i>.</p> @param uri the job end notification uri @see JobStatus @see <a href="{@docRoot}/org/apache/hadoop/mapred/JobClient.html# JobCompletionAndChaining">Job Completion and Chaining</a>]]> </doc> </method> <method name="getJobLocalDir" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get job-specific shared directory for use as scratch space <p> When a job starts, a shared directory is created at location <code> ${mapreduce.cluster.local.dir}/taskTracker/$user/jobcache/$jobid/work/ </code>. This directory is exposed to the users through <code>mapreduce.job.local.dir </code>. So, the tasks can use this space as scratch space and share files among them. </p> This value is available as System property also. @return The localized job specific shared directory]]> </doc> </method> <method name="getMemoryForMapTask" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get memory required to run a map task of the job, in MB. If a value is specified in the configuration, it is returned. Else, it returns {@link #DISABLED_MEMORY_LIMIT}. <p/> For backward compatibility, if the job configuration sets the key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different from {@link #DISABLED_MEMORY_LIMIT}, that value will be used after converting it from bytes to MB. @return memory required to run a map task of the job, in MB, or {@link #DISABLED_MEMORY_LIMIT} if unset.]]> </doc> </method> <method name="setMemoryForMapTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="mem" type="long"/> </method> <method name="getMemoryForReduceTask" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get memory required to run a reduce task of the job, in MB. If a value is specified in the configuration, it is returned. Else, it returns {@link #DISABLED_MEMORY_LIMIT}. <p/> For backward compatibility, if the job configuration sets the key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different from {@link #DISABLED_MEMORY_LIMIT}, that value will be used after converting it from bytes to MB. @return memory required to run a reduce task of the job, in MB, or {@link #DISABLED_MEMORY_LIMIT} if unset.]]> </doc> </method> <method name="setMemoryForReduceTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="mem" type="long"/> </method> <method name="getQueueName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the name of the queue to which this job is submitted. Defaults to 'default'. @return name of the queue]]> </doc> </method> <method name="setQueueName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> <doc> <![CDATA[Set the name of the queue to which this job should be submitted. @param queueName Name of the queue]]> </doc> </method> <method name="normalizeMemoryConfigValue" return="long" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="long"/> <doc> <![CDATA[Normalize the negative values in configuration @param val @return normalized value]]> </doc> </method> <method name="getMaxVirtualMemoryForTask" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Use {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()}"> <doc> <![CDATA[Get the memory required to run a task of this job, in bytes. See {@link #MAPRED_TASK_MAXVMEM_PROPERTY} <p/> This method is deprecated. Now, different memory limits can be set for map and reduce tasks of a job, in MB. <p/> For backward compatibility, if the job configuration sets the key {@link #MAPRED_TASK_MAXVMEM_PROPERTY} to a value different from {@link #DISABLED_MEMORY_LIMIT}, that value is returned. Otherwise, this method will return the larger of the values returned by {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()} after converting them into bytes. @return Memory required to run a task of this job, in bytes, or {@link #DISABLED_MEMORY_LIMIT}, if unset. @see #setMaxVirtualMemoryForTask(long) @deprecated Use {@link #getMemoryForMapTask()} and {@link #getMemoryForReduceTask()}]]> </doc> </method> <method name="setMaxVirtualMemoryForTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Use {@link #setMemoryForMapTask(long mem)} and Use {@link #setMemoryForReduceTask(long mem)}"> <param name="vmem" type="long"/> <doc> <![CDATA[Set the maximum amount of memory any task of this job can use. See {@link #MAPRED_TASK_MAXVMEM_PROPERTY} <p/> mapred.task.maxvmem is split into mapreduce.map.memory.mb and mapreduce.map.memory.mb,mapred each of the new key are set as mapred.task.maxvmem / 1024 as new values are in MB @param vmem Maximum amount of virtual memory in bytes any task of this job can use. @see #getMaxVirtualMemoryForTask() @deprecated Use {@link #setMemoryForMapTask(long mem)} and Use {@link #setMemoryForReduceTask(long mem)}]]> </doc> </method> <method name="getMaxPhysicalMemoryForTask" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="this variable is deprecated and nolonger in use."> <doc> <![CDATA[@deprecated this variable is deprecated and nolonger in use.]]> </doc> </method> <method name="setMaxPhysicalMemoryForTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="mem" type="long"/> </method> <field name="MAPRED_TASK_MAXVMEM_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="Use {@link #MAPRED_JOB_MAP_MEMORY_MB_PROPERTY} and {@link #MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY}"> <doc> <![CDATA[@deprecated Use {@link #MAPRED_JOB_MAP_MEMORY_MB_PROPERTY} and {@link #MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY}]]> </doc> </field> <field name="UPPER_LIMIT_ON_TASK_VMEM_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="deprecated, no comment"> <doc> <![CDATA[@deprecated]]> </doc> </field> <field name="MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="deprecated, no comment"> <doc> <![CDATA[@deprecated]]> </doc> </field> <field name="MAPRED_TASK_MAXPMEM_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="deprecated, no comment"> <doc> <![CDATA[@deprecated]]> </doc> </field> <field name="DISABLED_MEMORY_LIMIT" type="long" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[A value which if set for memory related configuration options, indicates that the options are turned off.]]> </doc> </field> <field name="MAPRED_LOCAL_DIR_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Property name for the configuration property mapreduce.cluster.local.dir]]> </doc> </field> <field name="DEFAULT_QUEUE_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Name of the queue to which jobs will be submitted, if no queue name is mentioned.]]> </doc> </field> <field name="UNPACK_JAR_PATTERN_DEFAULT" type="java.util.regex.Pattern" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Pattern for the default unpacking behavior for job jars]]> </doc> </field> <field name="MAPRED_TASK_JAVA_OPTS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}"> <doc> <![CDATA[Configuration key to set the java command line options for the child map and reduce tasks. Java opts for the task tracker child processes. The following symbol, if present, will be interpolated: @taskid@. It is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc The configuration variable {@link #MAPRED_TASK_ULIMIT} can be used to control the maximum virtual memory of the child processes. The configuration variable {@link #MAPRED_TASK_ENV} can be used to pass other environment variables to the child processes. @deprecated Use {@link #MAPRED_MAP_TASK_JAVA_OPTS} or {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}]]> </doc> </field> <field name="MAPRED_MAP_TASK_JAVA_OPTS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the java command line options for the map tasks. Java opts for the task tracker child map processes. The following symbol, if present, will be interpolated: @taskid@. It is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc The configuration variable {@link #MAPRED_MAP_TASK_ULIMIT} can be used to control the maximum virtual memory of the map processes. The configuration variable {@link #MAPRED_MAP_TASK_ENV} can be used to pass other environment variables to the map processes.]]> </doc> </field> <field name="MAPRED_REDUCE_TASK_JAVA_OPTS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the java command line options for the reduce tasks. Java opts for the task tracker child reduce processes. The following symbol, if present, will be interpolated: @taskid@. It is replaced by current TaskID. Any other occurrences of '@' will go unchanged. For example, to enable verbose gc logging to a file named for the taskid in /tmp and to set the heap maximum to be a gigabyte, pass a 'value' of: -Xmx1024m -verbose:gc -Xloggc:/tmp/@taskid@.gc The configuration variable {@link #MAPRED_REDUCE_TASK_ULIMIT} can be used to control the maximum virtual memory of the reduce processes. The configuration variable {@link #MAPRED_REDUCE_TASK_ENV} can be used to pass process environment variables to the reduce processes.]]> </doc> </field> <field name="DEFAULT_MAPRED_TASK_JAVA_OPTS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="MAPRED_TASK_ULIMIT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="Use {@link #MAPRED_MAP_TASK_ULIMIT} or {@link #MAPRED_REDUCE_TASK_ULIMIT}"> <doc> <![CDATA[Configuration key to set the maximum virutal memory available to the child map and reduce tasks (in kilo-bytes). Note: This must be greater than or equal to the -Xmx passed to the JavaVM via {@link #MAPRED_TASK_JAVA_OPTS}, else the VM might not start. @deprecated Use {@link #MAPRED_MAP_TASK_ULIMIT} or {@link #MAPRED_REDUCE_TASK_ULIMIT}]]> </doc> </field> <field name="MAPRED_MAP_TASK_ULIMIT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the maximum virutal memory available to the map tasks (in kilo-bytes). Note: This must be greater than or equal to the -Xmx passed to the JavaVM via {@link #MAPRED_MAP_TASK_JAVA_OPTS}, else the VM might not start.]]> </doc> </field> <field name="MAPRED_REDUCE_TASK_ULIMIT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the maximum virutal memory available to the reduce tasks (in kilo-bytes). Note: This must be greater than or equal to the -Xmx passed to the JavaVM via {@link #MAPRED_REDUCE_TASK_JAVA_OPTS}, else the VM might not start.]]> </doc> </field> <field name="MAPRED_TASK_ENV" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="Use {@link #MAPRED_MAP_TASK_ENV} or {@link #MAPRED_REDUCE_TASK_ENV}"> <doc> <![CDATA[Configuration key to set the environment of the child map/reduce tasks. The format of the value is <code>k1=v1,k2=v2</code>. Further it can reference existing environment variables via <code>$key</code>. Example: <ul> <li> A=foo - This will set the env variable A to foo. </li> <li> B=$X:c This is inherit tasktracker's X env variable. </li> </ul> @deprecated Use {@link #MAPRED_MAP_TASK_ENV} or {@link #MAPRED_REDUCE_TASK_ENV}]]> </doc> </field> <field name="MAPRED_MAP_TASK_ENV" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the maximum virutal memory available to the map tasks. The format of the value is <code>k1=v1,k2=v2</code>. Further it can reference existing environment variables via <code>$key</code>. Example: <ul> <li> A=foo - This will set the env variable A to foo. </li> <li> B=$X:c This is inherit tasktracker's X env variable. </li> </ul>]]> </doc> </field> <field name="MAPRED_REDUCE_TASK_ENV" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the maximum virutal memory available to the reduce tasks. The format of the value is <code>k1=v1,k2=v2</code>. Further it can reference existing environment variables via <code>$key</code>. Example: <ul> <li> A=foo - This will set the env variable A to foo. </li> <li> B=$X:c This is inherit tasktracker's X env variable. </li> </ul>]]> </doc> </field> <field name="MAPRED_MAP_TASK_LOG_LEVEL" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the logging {@link Level} for the map task. The allowed logging levels are: OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]> </doc> </field> <field name="MAPRED_REDUCE_TASK_LOG_LEVEL" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set the logging {@link Level} for the reduce task. The allowed logging levels are: OFF, FATAL, ERROR, WARN, INFO, DEBUG, TRACE and ALL.]]> </doc> </field> <field name="DEFAULT_LOG_LEVEL" type="org.apache.log4j.Level" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default logging level for map/reduce tasks.]]> </doc> </field> <doc> <![CDATA[A map/reduce job configuration. <p><code>JobConf</code> is the primary interface for a user to describe a map-reduce job to the Hadoop framework for execution. The framework tries to faithfully execute the job as-is described by <code>JobConf</code>, however: <ol> <li> Some configuration parameters might have been marked as <a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams"> final</a> by administrators and hence cannot be altered. </li> <li> While some job parameters are straight-forward to set (e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly rest of the framework and/or job-configuration and is relatively more complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}). </li> </ol></p> <p><code>JobConf</code> typically specifies the {@link Mapper}, combiner (if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and {@link OutputFormat} implementations to be used etc. <p>Optionally <code>JobConf</code> is used to specify other advanced facets of the job such as <code>Comparator</code>s to be used, files to be put in the {@link DistributedCache}, whether or not intermediate and/or job outputs are to be compressed (and how), debugability via user-provided scripts ( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}), for doing post-processing on task logs, task's stdout, stderr, syslog. and etc.</p> <p>Here is an example on how to configure a job via <code>JobConf</code>:</p> <p><blockquote><pre> // Create a new JobConf JobConf job = new JobConf(new Configuration(), MyJob.class); // Specify various job-specific parameters job.setJobName("myjob"); FileInputFormat.setInputPaths(job, new Path("in")); FileOutputFormat.setOutputPath(job, new Path("out")); job.setMapperClass(MyJob.MyMapper.class); job.setCombinerClass(MyJob.MyReducer.class); job.setReducerClass(MyJob.MyReducer.class); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); </pre></blockquote></p> @see JobClient @see ClusterStatus @see Tool @see DistributedCache @deprecated Use {@link Configuration} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.JobConf --> <!-- start interface org.apache.hadoop.mapred.JobConfigurable --> <interface name="JobConfigurable" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Initializes a new instance from a {@link JobConf}. @param job the configuration]]> </doc> </method> <doc> <![CDATA[That what may be configured.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.JobConfigurable --> <!-- start interface org.apache.hadoop.mapred.JobContext --> <interface name="JobContext" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.JobContext} instead."> <implements name="org.apache.hadoop.mapreduce.JobContext"/> <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the job Configuration @return JobConf]]> </doc> </method> <method name="getProgressible" return="org.apache.hadoop.util.Progressable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the progress mechanism for reporting progress. @return progress mechanism]]> </doc> </method> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.JobContext} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.JobContext --> <!-- start class org.apache.hadoop.mapred.JobID --> <class name="JobID" extends="org.apache.hadoop.mapreduce.JobID" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="JobID" type="java.lang.String, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a JobID object @param jtIdentifier jobTracker identifier @param id job number]]> </doc> </constructor> <constructor name="JobID" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="downgrade" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="old" type="org.apache.hadoop.mapreduce.JobID"/> <doc> <![CDATA[Downgrade a new JobID to an old one @param old a new or old JobID @return either old or a new JobID build to match old]]> </doc> </method> <method name="read" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="forName" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="str" type="java.lang.String"/> <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> <doc> <![CDATA[Construct a JobId object from given string @return constructed JobId object or null if the given String is null @throws IllegalArgumentException if the given string is malformed]]> </doc> </method> <method name="getJobIDsPattern" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="jtIdentifier" type="java.lang.String"/> <param name="jobId" type="java.lang.Integer"/> <doc> <![CDATA[Returns a regex pattern which matches task IDs. Arguments can be given null, in which case that part of the regex will be generic. For example to obtain a regex matching <i>any job</i> run on the jobtracker started at <i>200707121733</i>, we would use : <pre> JobID.getTaskIDsPattern("200707121733", null); </pre> which will return : <pre> "job_200707121733_[0-9]*" </pre> @param jtIdentifier jobTracker identifier, or null @param jobId job number, or null @return a regex pattern matching JobIDs]]> </doc> </method> <doc> <![CDATA[JobID represents the immutable and unique identifier for the job. JobID consists of two parts. First part represents the jobtracker identifier, so that jobID to jobtracker map is defined. For cluster setup this string is the jobtracker start time, for local setting, it is "local". Second part of the JobID is the job number. <br> An example JobID is : <code>job_200707121733_0003</code> , which represents the third job running at the jobtracker started at <code>200707121733</code>. <p> Applications should never construct or parse JobID strings, but rather use appropriate constructors or {@link #forName(String)} method. @see TaskID @see TaskAttemptID]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.JobID --> <!-- start class org.apache.hadoop.mapred.JobPriority --> <class name="JobPriority" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.JobPriority} instead"> <method name="values" return="org.apache.hadoop.mapred.JobPriority[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapred.JobPriority" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <doc> <![CDATA[Used to describe the priority of the running job. @deprecated Use {@link org.apache.hadoop.mapreduce.JobPriority} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.JobPriority --> <!-- start class org.apache.hadoop.mapred.JobQueueInfo --> <class name="JobQueueInfo" extends="org.apache.hadoop.mapreduce.QueueInfo" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link QueueInfo} instead"> <constructor name="JobQueueInfo" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default constructor for Job Queue Info.]]> </doc> </constructor> <constructor name="JobQueueInfo" type="java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a new JobQueueInfo object using the queue name and the scheduling information passed. @param queueName Name of the job queue @param schedulingInfo Scheduling Information associated with the job queue]]> </doc> </constructor> <method name="setQueueName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> <doc> <![CDATA[Set the queue name of the JobQueueInfo @param queueName Name of the job queue.]]> </doc> </method> <method name="setSchedulingInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="schedulingInfo" type="java.lang.String"/> <doc> <![CDATA[Set the scheduling information associated to particular job queue @param schedulingInfo]]> </doc> </method> <method name="setQueueState" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="state" type="java.lang.String"/> <doc> <![CDATA[Set the state of the queue @param state state of the queue.]]> </doc> </method> <method name="setChildren" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="children" type="java.util.List"/> </method> <method name="getChildren" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setProperties" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="props" type="java.util.Properties"/> </method> <method name="setJobStatuses" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="stats" type="org.apache.hadoop.mapreduce.JobStatus[]"/> </method> <doc> <![CDATA[Class that contains the information regarding the Job Queues which are maintained by the Hadoop Map/Reduce framework. @deprecated Use {@link QueueInfo} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.JobQueueInfo --> <!-- start class org.apache.hadoop.mapred.JobStatus --> <class name="JobStatus" extends="org.apache.hadoop.mapreduce.JobStatus" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.JobStatus} instead"> <constructor name="JobStatus" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a job status object for a given jobid. @param jobid The jobid of the job @param mapProgress The progress made on the maps @param reduceProgress The progress made on the reduces @param cleanupProgress The progress made on cleanup @param runState The current state of the job @param user userid of the person who submitted the job. @param jobName user-specified job name. @param jobFile job configuration file. @param trackingUrl link to the web-ui for details of the job.]]> </doc> </constructor> <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int, java.lang.String, java.lang.String, java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a job status object for a given jobid. @param jobid The jobid of the job @param mapProgress The progress made on the maps @param reduceProgress The progress made on the reduces @param runState The current state of the job @param user userid of the person who submitted the job. @param jobName user-specified job name. @param jobFile job configuration file. @param trackingUrl link to the web-ui for details of the job.]]> </doc> </constructor> <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a job status object for a given jobid. @param jobid The jobid of the job @param mapProgress The progress made on the maps @param reduceProgress The progress made on the reduces @param runState The current state of the job @param jp Priority of the job. @param user userid of the person who submitted the job. @param jobName user-specified job name. @param jobFile job configuration file. @param trackingUrl link to the web-ui for details of the job.]]> </doc> </constructor> <constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a job status object for a given jobid. @param jobid The jobid of the job @param setupProgress The progress made on the setup @param mapProgress The progress made on the maps @param reduceProgress The progress made on the reduces @param cleanupProgress The progress made on the cleanup @param runState The current state of the job @param jp Priority of the job. @param user userid of the person who submitted the job. @param jobName user-specified job name. @param jobFile job configuration file. @param trackingUrl link to the web-ui for details of the job.]]> </doc> </constructor> <method name="getJobRunState" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="state" type="int"/> <doc> <![CDATA[Helper method to get human-readable state of the job. @param state job state @return human-readable state of the job]]> </doc> </method> <method name="downgrade" return="org.apache.hadoop.mapred.JobStatus" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="stat" type="org.apache.hadoop.mapreduce.JobStatus"/> </method> <method name="getJobId" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="use getJobID instead"> <doc> <![CDATA[@deprecated use getJobID instead]]> </doc> </method> <method name="getJobID" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return The jobid of the Job]]> </doc> </method> <method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the priority of the job @return job priority]]> </doc> </method> <method name="setMapProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the map progress of this job @param p The value of map progress to set to]]> </doc> </method> <method name="setCleanupProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the cleanup progress of this job @param p The value of cleanup progress to set to]]> </doc> </method> <method name="setSetupProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the setup progress of this job @param p The value of setup progress to set to]]> </doc> </method> <method name="setReduceProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the reduce progress of this Job @param p The value of reduce progress to set to]]> </doc> </method> <method name="setFinishTime" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="finishTime" type="long"/> <doc> <![CDATA[Set the finish time of the job @param finishTime The finishTime of the job]]> </doc> </method> <method name="setHistoryFile" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="historyFile" type="java.lang.String"/> <doc> <![CDATA[Set the job history file url for a completed job]]> </doc> </method> <method name="setTrackingUrl" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="trackingUrl" type="java.lang.String"/> <doc> <![CDATA[Set the link to the web-ui for details of the job.]]> </doc> </method> <method name="setRetired" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Set the job retire flag to true.]]> </doc> </method> <method name="setRunState" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="state" type="int"/> <doc> <![CDATA[Change the current run state of the job.]]> </doc> </method> <method name="getRunState" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return running state of the job]]> </doc> </method> <method name="setStartTime" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="startTime" type="long"/> <doc> <![CDATA[Set the start time of the job @param startTime The startTime of the job]]> </doc> </method> <method name="setUsername" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="userName" type="java.lang.String"/> <doc> <![CDATA[@param userName The username of the job]]> </doc> </method> <method name="setSchedulingInfo" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="schedulingInfo" type="java.lang.String"/> <doc> <![CDATA[Used to set the scheduling information associated to a particular Job. @param schedulingInfo Scheduling information of the job]]> </doc> </method> <method name="setJobACLs" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="acls" type="java.util.Map"/> </method> <method name="setJobPriority" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jp" type="org.apache.hadoop.mapred.JobPriority"/> <doc> <![CDATA[Set the priority of the job, defaulting to NORMAL. @param jp new job priority]]> </doc> </method> <method name="mapProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in maps]]> </doc> </method> <method name="cleanupProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in cleanup]]> </doc> </method> <method name="setupProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in setup]]> </doc> </method> <method name="reduceProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in reduce]]> </doc> </method> <field name="RUNNING" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="SUCCEEDED" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="FAILED" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="PREP" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="KILLED" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[Describes the current status of a job. This is not intended to be a comprehensive piece of data. For that, look at JobProfile. @deprecated Use {@link org.apache.hadoop.mapreduce.JobStatus} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.JobStatus --> <!-- start class org.apache.hadoop.mapred.KeyValueLineRecordReader --> <class name="KeyValueLineRecordReader" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader} instead"> <implements name="org.apache.hadoop.mapred.RecordReader"/> <constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="getKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="createKey" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="createValue" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="findSeparator" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="utf" type="byte[]"/> <param name="start" type="int"/> <param name="length" type="int"/> <param name="sep" type="byte"/> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="value" type="org.apache.hadoop.io.Text"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read key/value pair in a line.]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[This class treats a line in the input as a key/value pair separated by a separator character. The separator can be specified in config file under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default separator is the tab character ('\t'). @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.KeyValueLineRecordReader --> <!-- start class org.apache.hadoop.mapred.KeyValueTextInputFormat --> <class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat} instead"> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <constructor name="KeyValueTextInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="isSplitable" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="file" type="org.apache.hadoop.fs.Path"/> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. Either linefeed or carriage-return are used to signal end of line. Each line is divided into key and value parts by a separator byte. If no such a byte exists, the key will be the entire line and value will be empty. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.KeyValueTextInputFormat --> <!-- start class org.apache.hadoop.mapred.LineRecordReader.LineReader --> <class name="LineRecordReader.LineReader" extends="org.apache.hadoop.util.LineReader" abstract="false" static="true" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.util.LineReader} instead."> <constructor name="LineRecordReader.LineReader" type="java.io.InputStream, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <doc> <![CDATA[A class that provides a line reader from an input stream. @deprecated Use {@link org.apache.hadoop.util.LineReader} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.LineRecordReader.LineReader --> <!-- start class org.apache.hadoop.mapred.MapFileOutputFormat --> <class name="MapFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat} instead"> <constructor name="MapFileOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="dir" type="org.apache.hadoop.fs.Path"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Open the output generated by this format.]]> </doc> </method> <method name="getEntry" return="org.apache.hadoop.io.Writable" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/> <param name="partitioner" type="org.apache.hadoop.mapred.Partitioner"/> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get an entry from output generated by this class.]]> </doc> </method> <doc> <![CDATA[An {@link OutputFormat} that writes {@link MapFile}s. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.MapFileOutputFormat --> <!-- start interface org.apache.hadoop.mapred.Mapper --> <interface name="Mapper" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead."> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <implements name="org.apache.hadoop.io.Closeable"/> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K1"/> <param name="value" type="V1"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Maps a single input key/value pair into an intermediate key/value pair. <p>Output pairs need not be of the same types as input pairs. A given input pair may map to zero or many output pairs. Output pairs are collected with calls to {@link OutputCollector#collect(Object,Object)}.</p> <p>Applications can use the {@link Reporter} provided to report progress or just indicate that they are alive. In scenarios where the application takes an insignificant amount of time to process individual key/value pairs, this is crucial since the framework might assume that the task has timed-out and kill that task. The other way of avoiding this is to set <a href="{@docRoot}/../mapred-default.html#mapreduce.task.timeout"> mapreduce.task.timeout</a> to a high-enough value (or even zero for no time-outs).</p> @param key the input key. @param value the input value. @param output collects mapped keys and values. @param reporter facility to report progress.]]> </doc> </method> <doc> <![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs. <p>Maps are the individual tasks which transform input records into a intermediate records. The transformed intermediate records need not be of the same type as the input records. A given input pair may map to zero or many output pairs.</p> <p>The Hadoop Map-Reduce framework spawns one map task for each {@link InputSplit} generated by the {@link InputFormat} for the job. <code>Mapper</code> implementations can access the {@link JobConf} for the job via the {@link JobConfigurable#configure(JobConf)} and initialize themselves. Similarly they can use the {@link Closeable#close()} method for de-initialization.</p> <p>The framework then calls {@link #map(Object, Object, OutputCollector, Reporter)} for each key/value pair in the <code>InputSplit</code> for that task.</p> <p>All intermediate values associated with a given output key are subsequently grouped by the framework, and passed to a {@link Reducer} to determine the final output. Users can control the grouping by specifying a <code>Comparator</code> via {@link JobConf#setOutputKeyComparatorClass(Class)}.</p> <p>The grouped <code>Mapper</code> outputs are partitioned per <code>Reducer</code>. Users can control which keys (and hence records) go to which <code>Reducer</code> by implementing a custom {@link Partitioner}. <p>Users can optionally specify a <code>combiner</code>, via {@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the intermediate outputs, which helps to cut down the amount of data transferred from the <code>Mapper</code> to the <code>Reducer</code>. <p>The intermediate, grouped outputs are always stored in {@link SequenceFile}s. Applications can specify if and how the intermediate outputs are to be compressed and which {@link CompressionCodec}s are to be used via the <code>JobConf</code>.</p> <p>If the job has <a href="{@docRoot}/org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero reduces</a> then the output of the <code>Mapper</code> is directly written to the {@link FileSystem} without grouping by keys.</p> <p>Example:</p> <p><blockquote><pre> public class MyMapper<K extends WritableComparable, V extends Writable> extends MapReduceBase implements Mapper<K, V, K, V> { static enum MyCounters { NUM_RECORDS } private String mapTaskId; private String inputFile; private int noRecords = 0; public void configure(JobConf job) { mapTaskId = job.get(JobContext.TASK_ATTEMPT_ID); inputFile = job.get(JobContext.MAP_INPUT_FILE); } public void map(K key, V val, OutputCollector<K, V> output, Reporter reporter) throws IOException { // Process the <key, value> pair (assume this takes a while) // ... // ... // Let the framework know that we are alive, and kicking! // reporter.progress(); // Process some more // ... // ... // Increment the no. of <key, value> pairs processed ++noRecords; // Increment counters reporter.incrCounter(NUM_RECORDS, 1); // Every 100 records update application-level status if ((noRecords%100) == 0) { reporter.setStatus(mapTaskId + " processed " + noRecords + " from input-file: " + inputFile); } // Output the result output.collect(key, val); } } </pre></blockquote></p> <p>Applications may write a custom {@link MapRunnable} to exert greater control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p> @see JobConf @see InputFormat @see Partitioner @see Reducer @see MapReduceBase @see MapRunnable @see SequenceFile @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.Mapper --> <!-- start class org.apache.hadoop.mapred.MapReduceBase --> <class name="MapReduceBase" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Closeable"/> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <constructor name="MapReduceBase" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Default implementation that does nothing.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Default implementation that does nothing.]]> </doc> </method> <doc> <![CDATA[Base class for {@link Mapper} and {@link Reducer} implementations. <p>Provides default no-op implementations for a few methods, most non-trivial applications need to override some of them.</p>]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.MapReduceBase --> <!-- start interface org.apache.hadoop.mapred.MapRunnable --> <interface name="MapRunnable" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead."> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Start mapping input <tt><key, value></tt> pairs. <p>Mapping of input records to output records is complete when this method returns.</p> @param input the {@link RecordReader} to read the input records. @param output the {@link OutputCollector} to collect the outputrecords. @param reporter {@link Reporter} to report progress, status-updates etc. @throws IOException]]> </doc> </method> <doc> <![CDATA[Expert: Generic interface for {@link Mapper}s. <p>Custom implementations of <code>MapRunnable</code> can exert greater control on map processing e.g. multi-threaded, asynchronous mappers etc.</p> @see Mapper @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.MapRunnable --> <!-- start class org.apache.hadoop.mapred.MapRunner --> <class name="MapRunner" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.MapRunnable"/> <constructor name="MapRunner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getMapper" return="org.apache.hadoop.mapred.Mapper" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <doc> <![CDATA[Default {@link MapRunnable} implementation.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.MapRunner --> <!-- start class org.apache.hadoop.mapred.MultiFileInputFormat --> <class name="MultiFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead"> <constructor name="MultiFileInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="numSplits" type="int"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An abstract {@link InputFormat} that returns {@link MultiFileSplit}'s in {@link #getSplits(JobConf, int)} method. Splits are constructed from the files under the input paths. Each split returned contains <i>nearly</i> equal content length. <br> Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)} to construct <code>RecordReader</code>'s for <code>MultiFileSplit</code>'s. @see MultiFileSplit @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.MultiFileInputFormat --> <!-- start class org.apache.hadoop.mapred.MultiFileSplit --> <class name="MultiFileSplit" extends="org.apache.hadoop.mapred.lib.CombineFileSplit" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead"> <constructor name="MultiFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit class does not represent a split of a file, but a split of input files into smaller sets. The atomic unit of split is a file. <br> MultiFileSplit can be used to implement {@link RecordReader}'s, with reading one record per file. @see FileSplit @see MultiFileInputFormat @deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.MultiFileSplit --> <!-- start interface org.apache.hadoop.mapred.OutputCollector --> <interface name="OutputCollector" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="collect" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Adds a key/value pair to the output. @param key the key to collect. @param value to value to collect. @throws IOException]]> </doc> </method> <doc> <![CDATA[Collects the <code><key, value></code> pairs output by {@link Mapper}s and {@link Reducer}s. <p><code>OutputCollector</code> is the generalization of the facility provided by the Map-Reduce framework to collect data output by either the <code>Mapper</code> or the <code>Reducer</code> i.e. intermediate outputs or the output of the job.</p>]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.OutputCollector --> <!-- start class org.apache.hadoop.mapred.OutputCommitter --> <class name="OutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead."> <constructor name="OutputCommitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setupJob" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For the framework to setup the job output during initialization @param jobContext Context of the job whose output is being written. @throws IOException if temporary output could not be created]]> </doc> </method> <method name="cleanupJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Use {@link #commitJob(JobContext)} or {@link #abortJob(JobContext, int)} instead."> <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For cleaning up the job's output after job completion @param jobContext Context of the job whose output is being written. @throws IOException @deprecated Use {@link #commitJob(JobContext)} or {@link #abortJob(JobContext, int)} instead.]]> </doc> </method> <method name="commitJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For committing job's output after successful job completion. Note that this is invoked for jobs with final runstate as SUCCESSFUL. @param jobContext Context of the job whose output is being written. @throws IOException]]> </doc> </method> <method name="abortJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/> <param name="status" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for jobs with final runstate as {@link JobStatus#FAILED} or {@link JobStatus#KILLED} @param jobContext Context of the job whose output is being written. @param status final runstate of the job @throws IOException]]> </doc> </method> <method name="setupTask" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Sets up output for the task. @param taskContext Context of the task whose output is being written. @throws IOException]]> </doc> </method> <method name="needsTaskCommit" return="boolean" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Check whether task needs a commit @param taskContext @return true/false @throws IOException]]> </doc> </method> <method name="commitTask" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[To promote the task's temporary output to final output location The task's output is moved to the job's output directory. @param taskContext Context of the task whose output is being written. @throws IOException if commit is not]]> </doc> </method> <method name="abortTask" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Discard the task output @param taskContext @throws IOException]]> </doc> </method> <method name="setupJob" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two.]]> </doc> </method> <method name="cleanupJob" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)} or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)} instead."> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two. @deprecated Use {@link #commitJob(org.apache.hadoop.mapreduce.JobContext)} or {@link #abortJob(org.apache.hadoop.mapreduce.JobContext, org.apache.hadoop.mapreduce.JobStatus.State)} instead.]]> </doc> </method> <method name="commitJob" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two.]]> </doc> </method> <method name="abortJob" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="runState" type="org.apache.hadoop.mapreduce.JobStatus.State"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two.]]> </doc> </method> <method name="setupTask" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two.]]> </doc> </method> <method name="needsTaskCommit" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two.]]> </doc> </method> <method name="commitTask" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two.]]> </doc> </method> <method name="abortTask" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method implements the new interface by calling the old method. Note that the input types are different between the new and old apis and this is a bridge between the two.]]> </doc> </method> <doc> <![CDATA[<code>OutputCommitter</code> describes the commit of task output for a Map-Reduce job. <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of the job to:<p> <ol> <li> Setup the job during initialization. For example, create the temporary output directory for the job during the initialization of the job. </li> <li> Cleanup the job after the job completion. For example, remove the temporary output directory after the job completion. </li> <li> Setup the task temporary output. </li> <li> Check whether a task needs a commit. This is to avoid the commit procedure if a task does not need commit. </li> <li> Commit of the task output. </li> <li> Discard the task commit. </li> </ol> @see FileOutputCommitter @see JobContext @see TaskAttemptContext @deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.OutputCommitter --> <!-- start interface org.apache.hadoop.mapred.OutputFormat --> <interface name="OutputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead."> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the {@link RecordWriter} for the given job. @param ignored @param job configuration for the job whose output is being written. @param name the unique name for this part of the output. @param progress mechanism for reporting progress while writing to file. @return a {@link RecordWriter} to write the output for the job. @throws IOException]]> </doc> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Check for validity of the output-specification for the job. <p>This is to validate the output specification for the job when it is a job is submitted. Typically checks that it does not already exist, throwing an exception when it already exists, so that output is not overwritten.</p> @param ignored @param job job configuration. @throws IOException when output should not be attempted]]> </doc> </method> <doc> <![CDATA[<code>OutputFormat</code> describes the output-specification for a Map-Reduce job. <p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the job to:<p> <ol> <li> Validate the output-specification of the job. For e.g. check that the output directory doesn't already exist. <li> Provide the {@link RecordWriter} implementation to be used to write out the output files of the job. Output files are stored in a {@link FileSystem}. </li> </ol> @see RecordWriter @see JobConf @deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.OutputFormat --> <!-- start class org.apache.hadoop.mapred.OutputLogFilter --> <class name="OutputLogFilter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter} instead."> <implements name="org.apache.hadoop.fs.PathFilter"/> <constructor name="OutputLogFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> </method> <doc> <![CDATA[This class filters log files from directory given It doesnt accept paths having _logs. This can be used to list paths of output directory as follows: Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, new OutputLogFilter())); @deprecated Use {@link org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.OutputLogFilter --> <!-- start interface org.apache.hadoop.mapred.Partitioner --> <interface name="Partitioner" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Partitioner} instead."> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <method name="getPartition" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K2"/> <param name="value" type="V2"/> <param name="numPartitions" type="int"/> <doc> <![CDATA[Get the paritition number for a given key (hence record) given the total number of partitions i.e. number of reduce-tasks for the job. <p>Typically a hash function on a all or a subset of the key.</p> @param key the key to be paritioned. @param value the entry value. @param numPartitions the total number of partitions. @return the partition number for the <code>key</code>.]]> </doc> </method> <doc> <![CDATA[Partitions the key space. <p><code>Partitioner</code> controls the partitioning of the keys of the intermediate map-outputs. The key (or a subset of the key) is used to derive the partition, typically by a hash function. The total number of partitions is the same as the number of reduce tasks for the job. Hence this controls which of the <code>m</code> reduce tasks the intermediate key (and hence the record) is sent for reduction.</p> @see Reducer @deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.Partitioner --> <!-- start interface org.apache.hadoop.mapred.RecordReader --> <interface name="RecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Reads the next key/value pair from the input for processing. @param key the key to read data into @param value the value to read data into @return true iff a key/value was read, false if at EOF]]> </doc> </method> <method name="createKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create an object of the appropriate type to be used as a key. @return a new key object.]]> </doc> </method> <method name="createValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create an object of the appropriate type to be used as a value. @return a new value object.]]> </doc> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Returns the current position in the input. @return the current position in the input. @throws IOException]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close this {@link InputSplit} to future operations. @throws IOException]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[How much of the input has the {@link RecordReader} consumed i.e. has been processed by? @return progress from <code>0.0</code> to <code>1.0</code>. @throws IOException]]> </doc> </method> <doc> <![CDATA[<code>RecordReader</code> reads <key, value> pairs from an {@link InputSplit}. <p><code>RecordReader</code>, typically, converts the byte-oriented view of the input, provided by the <code>InputSplit</code>, and presents a record-oriented view for the {@link Mapper} & {@link Reducer} tasks for processing. It thus assumes the responsibility of processing record boundaries and presenting the tasks with keys and values.</p> @see InputSplit @see InputFormat]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.RecordReader --> <!-- start interface org.apache.hadoop.mapred.RecordWriter --> <interface name="RecordWriter" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Writes a key/value pair. @param key the key to write. @param value the value to write. @throws IOException]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close this <code>RecordWriter</code> to future operations. @param reporter facility to report progress. @throws IOException]]> </doc> </method> <doc> <![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs to an output file. <p><code>RecordWriter</code> implementations write the job outputs to the {@link FileSystem}. @see OutputFormat]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.RecordWriter --> <!-- start interface org.apache.hadoop.mapred.Reducer --> <interface name="Reducer" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead."> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <implements name="org.apache.hadoop.io.Closeable"/> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K2"/> <param name="values" type="java.util.Iterator"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[<i>Reduces</i> values for a given key. <p>The framework calls this method for each <code><key, (list of values)></code> pair in the grouped inputs. Output values must be of the same type as input values. Input keys must not be altered. The framework will <b>reuse</b> the key and value objects that are passed into the reduce, therefore the application should clone the objects they want to keep a copy of. In many cases, all values are combined into zero or one value. </p> <p>Output pairs are collected with calls to {@link OutputCollector#collect(Object,Object)}.</p> <p>Applications can use the {@link Reporter} provided to report progress or just indicate that they are alive. In scenarios where the application takes an insignificant amount of time to process individual key/value pairs, this is crucial since the framework might assume that the task has timed-out and kill that task. The other way of avoiding this is to set <a href="{@docRoot}/../mapred-default.html#mapreduce.task.timeout"> mapreduce.task.timeout</a> to a high-enough value (or even zero for no time-outs).</p> @param key the key. @param values the list of values to reduce. @param output to collect keys and combined values. @param reporter facility to report progress.]]> </doc> </method> <doc> <![CDATA[Reduces a set of intermediate values which share a key to a smaller set of values. <p>The number of <code>Reducer</code>s for the job is set by the user via {@link JobConf#setNumReduceTasks(int)}. <code>Reducer</code> implementations can access the {@link JobConf} for the job via the {@link JobConfigurable#configure(JobConf)} method and initialize themselves. Similarly they can use the {@link Closeable#close()} method for de-initialization.</p> <p><code>Reducer</code> has 3 primary phases:</p> <ol> <li> <h4 id="Shuffle">Shuffle</h4> <p><code>Reducer</code> is input the grouped output of a {@link Mapper}. In the phase the framework, for each <code>Reducer</code>, fetches the relevant partition of the output of all the <code>Mapper</code>s, via HTTP. </p> </li> <li> <h4 id="Sort">Sort</h4> <p>The framework groups <code>Reducer</code> inputs by <code>key</code>s (since different <code>Mapper</code>s may have output the same key) in this stage.</p> <p>The shuffle and sort phases occur simultaneously i.e. while outputs are being fetched they are merged.</p> <h5 id="SecondarySort">SecondarySort</h5> <p>If equivalence rules for keys while grouping the intermediates are different from those for grouping keys before reduction, then one may specify a <code>Comparator</code> via {@link JobConf#setOutputValueGroupingComparator(Class)}.Since {@link JobConf#setOutputKeyComparatorClass(Class)} can be used to control how intermediate keys are grouped, these can be used in conjunction to simulate <i>secondary sort on values</i>.</p> For example, say that you want to find duplicate web pages and tag them all with the url of the "best" known example. You would set up the job like: <ul> <li>Map Input Key: url</li> <li>Map Input Value: document</li> <li>Map Output Key: document checksum, url pagerank</li> <li>Map Output Value: url</li> <li>Partitioner: by checksum</li> <li>OutputKeyComparator: by checksum and then decreasing pagerank</li> <li>OutputValueGroupingComparator: by checksum</li> </ul> </li> <li> <h4 id="Reduce">Reduce</h4> <p>In this phase the {@link #reduce(Object, Iterator, OutputCollector, Reporter)} method is called for each <code><key, (list of values)></code> pair in the grouped inputs.</p> <p>The output of the reduce task is typically written to the {@link FileSystem} via {@link OutputCollector#collect(Object, Object)}.</p> </li> </ol> <p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p> <p>Example:</p> <p><blockquote><pre> public class MyReducer<K extends WritableComparable, V extends Writable> extends MapReduceBase implements Reducer<K, V, K, V> { static enum MyCounters { NUM_RECORDS } private String reduceTaskId; private int noKeys = 0; public void configure(JobConf job) { reduceTaskId = job.get(JobContext.TASK_ATTEMPT_ID); } public void reduce(K key, Iterator<V> values, OutputCollector<K, V> output, Reporter reporter) throws IOException { // Process int noValues = 0; while (values.hasNext()) { V value = values.next(); // Increment the no. of values for this key ++noValues; // Process the <key, value> pair (assume this takes a while) // ... // ... // Let the framework know that we are alive, and kicking! if ((noValues%10) == 0) { reporter.progress(); } // Process some more // ... // ... // Output the <key, value> output.collect(key, value); } // Increment the no. of <key, list of values> pairs processed ++noKeys; // Increment counters reporter.incrCounter(NUM_RECORDS, 1); // Every 100 keys update application-level status if ((noKeys%100) == 0) { reporter.setStatus(reduceTaskId + " processed " + noKeys); } } } </pre></blockquote></p> @see Mapper @see Partitioner @see Reporter @see MapReduceBase @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.Reducer --> <!-- start interface org.apache.hadoop.mapred.Reporter --> <interface name="Reporter" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Progressable"/> <method name="setStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="status" type="java.lang.String"/> <doc> <![CDATA[Set the status description for the task. @param status brief description of the current status.]]> </doc> </method> <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.Enum"/> <doc> <![CDATA[Get the {@link Counter} of the given group with the given name. @param name counter name @return the <code>Counter</code> of the given group/name.]]> </doc> </method> <method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="group" type="java.lang.String"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Get the {@link Counter} of the given group with the given name. @param group counter group @param name counter name @return the <code>Counter</code> of the given group/name.]]> </doc> </method> <method name="incrCounter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Enum"/> <param name="amount" type="long"/> <doc> <![CDATA[Increments the counter identified by the key, which can be of any {@link Enum} type, by the specified amount. @param key key to identify the counter to be incremented. The key can be be any <code>Enum</code>. @param amount A non-negative amount by which the counter is to be incremented.]]> </doc> </method> <method name="incrCounter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="group" type="java.lang.String"/> <param name="counter" type="java.lang.String"/> <param name="amount" type="long"/> <doc> <![CDATA[Increments the counter identified by the group and counter name by the specified amount. @param group name to identify the group of the counter to be incremented. @param counter name to identify the counter within the group. @param amount A non-negative amount by which the counter is to be incremented.]]> </doc> </method> <method name="getInputSplit" return="org.apache.hadoop.mapred.InputSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="UnsupportedOperationException" type="java.lang.UnsupportedOperationException"/> <doc> <![CDATA[Get the {@link InputSplit} object for a map. @return the <code>InputSplit</code> that the map is reading from. @throws UnsupportedOperationException if called outside a mapper]]> </doc> </method> <field name="NULL" type="org.apache.hadoop.mapred.Reporter" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[A constant of Reporter type that does nothing.]]> </doc> </field> <doc> <![CDATA[A facility for Map-Reduce applications to report progress and update counters, status information etc. <p>{@link Mapper} and {@link Reducer} can use the <code>Reporter</code> provided to report progress or just indicate that they are alive. In scenarios where the application takes an insignificant amount of time to process individual key/value pairs, this is crucial since the framework might assume that the task has timed-out and kill that task. <p>Applications can also update {@link Counters} via the provided <code>Reporter</code> .</p> @see Progressable @see Counters]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.Reporter --> <!-- start interface org.apache.hadoop.mapred.RunningJob --> <interface name="RunningJob" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Job} instead"> <method name="getConfiguration" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the underlying job configuration @return the configuration of the job.]]> </doc> </method> <method name="getID" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the job identifier. @return the job identifier.]]> </doc> </method> <method name="getJobID" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="This method is deprecated and will be removed. Applications should rather use {@link #getID()}."> <doc> <![CDATA[@deprecated This method is deprecated and will be removed. Applications should rather use {@link #getID()}.]]> </doc> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the name of the job. @return the name of the job.]]> </doc> </method> <method name="getJobFile" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the path of the submitted job configuration. @return the path of the submitted job configuration.]]> </doc> </method> <method name="getTrackingURL" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the URL where some job progress information will be displayed. @return the URL where some job progress information will be displayed.]]> </doc> </method> <method name="mapProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0 and 1.0. When all map tasks have completed, the function returns 1.0. @return the progress of the job's map-tasks. @throws IOException]]> </doc> </method> <method name="reduceProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0 and 1.0. When all reduce tasks have completed, the function returns 1.0. @return the progress of the job's reduce-tasks. @throws IOException]]> </doc> </method> <method name="cleanupProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0 and 1.0. When all cleanup tasks have completed, the function returns 1.0. @return the progress of the job's cleanup-tasks. @throws IOException]]> </doc> </method> <method name="setupProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0 and 1.0. When all setup tasks have completed, the function returns 1.0. @return the progress of the job's setup-tasks. @throws IOException]]> </doc> </method> <method name="isComplete" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Check if the job is finished or not. This is a non-blocking call. @return <code>true</code> if the job is complete, else <code>false</code>. @throws IOException]]> </doc> </method> <method name="isSuccessful" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Check if the job completed successfully. @return <code>true</code> if the job succeeded, else <code>false</code>. @throws IOException]]> </doc> </method> <method name="waitForCompletion" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Blocks until the job is complete. @throws IOException]]> </doc> </method> <method name="getJobState" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Returns the current state of the Job. {@link JobStatus} @throws IOException]]> </doc> </method> <method name="killJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Kill the running job. Blocks until all job tasks have been killed as well. If the job is no longer running, it simply returns. @throws IOException]]> </doc> </method> <method name="setJobPriority" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="priority" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Set the priority of a running job. @param priority the new priority for the job. @throws IOException]]> </doc> </method> <method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="startFrom" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get events indicating completion (success/failure) of component tasks. @param startFrom index to start fetching events from @return an array of {@link TaskCompletionEvent}s @throws IOException]]> </doc> </method> <method name="killTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> <param name="shouldFail" type="boolean"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Kill indicated task attempt. @param taskId the id of the task to be terminated. @param shouldFail if true the task is failed and added to failed tasks list, otherwise it is just killed, w/o affecting job failure status. @throws IOException]]> </doc> </method> <method name="killTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Applications should rather use {@link #killTask(TaskAttemptID, boolean)}"> <param name="taskId" type="java.lang.String"/> <param name="shouldFail" type="boolean"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}]]> </doc> </method> <method name="getCounters" return="org.apache.hadoop.mapred.Counters" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Gets the counters for this job. @return the counters for this job. @throws IOException]]> </doc> </method> <method name="getTaskDiagnostics" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Gets the diagnostic messages for a given task attempt. @param taskid @return the list of diagnostic messages for the task @throws IOException]]> </doc> </method> <method name="getHistoryUrl" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the url where history file is archived. Returns empty string if history file is not available yet. @return the url where history file is archived @throws IOException]]> </doc> </method> <method name="isRetired" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Check whether the job has been removed from JobTracker memory and retired. On retire, the job history file is copied to a location known by {@link #getHistoryUrl()} @return <code>true</code> if the job retired, else <code>false</code>. @throws IOException]]> </doc> </method> <doc> <![CDATA[<code>RunningJob</code> is the user-interface to query for details on a running Map-Reduce job. <p>Clients can get hold of <code>RunningJob</code> via the {@link JobClient} and then query the running-job for details such as name, configuration, progress etc.</p> @see JobClient @deprecated Use {@link org.apache.hadoop.mapreduce.Job} instead]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.RunningJob --> <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat --> <class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat} instead"> <constructor name="SequenceFileAsBinaryInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw) format. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat --> <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader --> <class name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.RecordReader"/> <constructor name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="createKey" return="org.apache.hadoop.io.BytesWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="createValue" return="org.apache.hadoop.io.BytesWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getKeyClassName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Retrieve the name of the key class for this SequenceFile. @see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName]]> </doc> </method> <method name="getValueClassName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Retrieve the name of the value class for this SequenceFile. @see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName]]> </doc> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.BytesWritable"/> <param name="val" type="org.apache.hadoop.io.BytesWritable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read raw bytes from a SequenceFile.]]> </doc> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return the progress within the input split @return 0.0 to 1.0 of the input byte range]]> </doc> </method> <doc> <![CDATA[Read records from a SequenceFile as binary (raw) bytes.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader --> <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat --> <class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapred.SequenceFileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat} instead"> <constructor name="SequenceFileAsBinaryOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setSequenceFileOutputKeyClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the key class for the {@link SequenceFile} <p>This allows the user to specify the key class to be different from the actual class ({@link BytesWritable}) used for writing </p> @param conf the {@link JobConf} to modify @param theClass the SequenceFile output key class.]]> </doc> </method> <method name="setSequenceFileOutputValueClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the value class for the {@link SequenceFile} <p>This allows the user to specify the value class to be different from the actual class ({@link BytesWritable}) used for writing </p> @param conf the {@link JobConf} to modify @param theClass the SequenceFile output key class.]]> </doc> </method> <method name="getSequenceFileOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the key class for the {@link SequenceFile} @return the key class of the {@link SequenceFile}]]> </doc> </method> <method name="getSequenceFileOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the value class for the {@link SequenceFile} @return the value class of the {@link SequenceFile}]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An {@link OutputFormat} that writes keys, values to {@link SequenceFile}s in binary(raw) format @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat --> <!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes --> <class name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" extends="org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat.WritableValueBytes" abstract="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <doc> <![CDATA[Inner class used for appendRaw]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes --> <!-- start class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat --> <class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat} instead"> <constructor name="SequenceFileAsTextInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[This class is similar to SequenceFileInputFormat, except it generates SequenceFileAsTextRecordReader which converts the input keys and values to their String forms by calling toString() method. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat --> <!-- start class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader --> <class name="SequenceFileAsTextRecordReader" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader} instead"> <implements name="org.apache.hadoop.mapred.RecordReader"/> <constructor name="SequenceFileAsTextRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="createKey" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="createValue" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="value" type="org.apache.hadoop.io.Text"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read key/value pair in a line.]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[This class converts the input keys and values to their String forms by calling toString() method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader class to TextInputFormat class. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader --> <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter --> <class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter} instead"> <constructor name="SequenceFileInputFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create a record reader for the given split @param split file split @param job job configuration @param reporter reporter who sends report to task tracker @return RecordReader]]> </doc> </method> <method name="setFilterClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="filterClass" type="java.lang.Class"/> <doc> <![CDATA[set the filter class @param conf application configuration @param filterClass filter class]]> </doc> </method> <doc> <![CDATA[A class that allows a map/red job to work on a sample of sequence files. The sample is decided by the filter class set by the job. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter --> <!-- start interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter --> <interface name="SequenceFileInputFilter.Filter" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter"/> <doc> <![CDATA[filter interface]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter --> <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase --> <class name="SequenceFileInputFilter.FilterBase" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.SequenceFileInputFilter.Filter"/> <constructor name="SequenceFileInputFilter.FilterBase" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[base class for Filters]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase --> <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter --> <class name="SequenceFileInputFilter.MD5Filter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFilter.MD5Filter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setFrequency" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="frequency" type="int"/> <doc> <![CDATA[set the filtering frequency in configuration @param conf configuration @param frequency filtering frequency]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[configure the filter according to configuration @param conf configuration]]> </doc> </method> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <doc> <![CDATA[Filtering method If MD5(key) % frequency==0, return true; otherwise return false @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]> </doc> </method> <field name="MD5_LEN" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class returns a set of records by examing the MD5 digest of its key against a filtering frequency <i>f</i>. The filtering criteria is MD5(key) % f == 0.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter --> <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter --> <class name="SequenceFileInputFilter.PercentFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFilter.PercentFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setFrequency" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="frequency" type="int"/> <doc> <![CDATA[set the frequency and stores it in conf @param conf configuration @param frequency filtering frequencey]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[configure the filter by checking the configuration @param conf configuration]]> </doc> </method> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <doc> <![CDATA[Filtering method If record# % frequency==0, return true; otherwise return false @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]> </doc> </method> <doc> <![CDATA[This class returns a percentage of records The percentage is determined by a filtering frequency <i>f</i> using the criteria record# % f == 0. For example, if the frequency is 10, one out of 10 records is returned.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter --> <!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter --> <class name="SequenceFileInputFilter.RegexFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFilter.RegexFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setPattern" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="regex" type="java.lang.String"/> <exception name="PatternSyntaxException" type="java.util.regex.PatternSyntaxException"/> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[configure the Filter by checking the configuration]]> </doc> </method> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <doc> <![CDATA[Filtering method If key matches the regex, return true; otherwise return false @see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]> </doc> </method> <doc> <![CDATA[Records filter by matching key to regex]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter --> <!-- start class org.apache.hadoop.mapred.SequenceFileInputFormat --> <class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat} instead."> <constructor name="SequenceFileInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An {@link InputFormat} for {@link SequenceFile}s. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileInputFormat --> <!-- start class org.apache.hadoop.mapred.SequenceFileOutputFormat --> <class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat} instead."> <constructor name="SequenceFileOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getReaders" return="org.apache.hadoop.io.SequenceFile.Reader[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="dir" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Open the output generated by this format.]]> </doc> </method> <method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}. @param conf the {@link JobConf} @return the {@link CompressionType} for the output {@link SequenceFile}, defaulting to {@link CompressionType#RECORD}]]> </doc> </method> <method name="setOutputCompressionType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/> <doc> <![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}. @param conf the {@link JobConf} to modify @param style the {@link CompressionType} for the output {@link SequenceFile}]]> </doc> </method> <doc> <![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileOutputFormat --> <!-- start class org.apache.hadoop.mapred.SequenceFileRecordReader --> <class name="SequenceFileRecordReader" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.RecordReader"/> <constructor name="SequenceFileRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="getKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The class of key that must be passed to {@link #next(Object, Object)}..]]> </doc> </method> <method name="getValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The class of value that must be passed to {@link #next(Object, Object)}..]]> </doc> </method> <method name="createKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="createValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getCurrentValue" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return the progress within the input split @return 0.0 to 1.0 of the input byte range]]> </doc> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="seek" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="pos" type="long"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <field name="conf" type="org.apache.hadoop.conf.Configuration" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SequenceFileRecordReader --> <!-- start class org.apache.hadoop.mapred.SkipBadRecords --> <class name="SkipBadRecords" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SkipBadRecords" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getAttemptsToStartSkipping" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the number of Task attempts AFTER which skip mode will be kicked off. When skip mode is kicked off, the tasks reports the range of records which it will process next to the TaskTracker. So that on failures, TT knows which ones are possibly the bad records. On further executions, those are skipped. Default value is 2. @param conf the configuration @return attemptsToStartSkipping no of task attempts]]> </doc> </method> <method name="setAttemptsToStartSkipping" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="attemptsToStartSkipping" type="int"/> <doc> <![CDATA[Set the number of Task attempts AFTER which skip mode will be kicked off. When skip mode is kicked off, the tasks reports the range of records which it will process next to the TaskTracker. So that on failures, TT knows which ones are possibly the bad records. On further executions, those are skipped. Default value is 2. @param conf the configuration @param attemptsToStartSkipping no of task attempts]]> </doc> </method> <method name="getAutoIncrMapperProcCount" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the flag which if set to true, {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented by MapRunner after invoking the map function. This value must be set to false for applications which process the records asynchronously or buffer the input records. For example streaming. In such cases applications should increment this counter on their own. Default value is true. @param conf the configuration @return <code>true</code> if auto increment {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}. <code>false</code> otherwise.]]> </doc> </method> <method name="setAutoIncrMapperProcCount" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="autoIncr" type="boolean"/> <doc> <![CDATA[Set the flag which if set to true, {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented by MapRunner after invoking the map function. This value must be set to false for applications which process the records asynchronously or buffer the input records. For example streaming. In such cases applications should increment this counter on their own. Default value is true. @param conf the configuration @param autoIncr whether to auto increment {@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.]]> </doc> </method> <method name="getAutoIncrReducerProcCount" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the flag which if set to true, {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented by framework after invoking the reduce function. This value must be set to false for applications which process the records asynchronously or buffer the input records. For example streaming. In such cases applications should increment this counter on their own. Default value is true. @param conf the configuration @return <code>true</code> if auto increment {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}. <code>false</code> otherwise.]]> </doc> </method> <method name="setAutoIncrReducerProcCount" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="autoIncr" type="boolean"/> <doc> <![CDATA[Set the flag which if set to true, {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented by framework after invoking the reduce function. This value must be set to false for applications which process the records asynchronously or buffer the input records. For example streaming. In such cases applications should increment this counter on their own. Default value is true. @param conf the configuration @param autoIncr whether to auto increment {@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.]]> </doc> </method> <method name="getSkipOutputPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the directory to which skipped records are written. By default it is the sub directory of the output _logs directory. User can stop writing skipped records by setting the value null. @param conf the configuration. @return path skip output directory. Null is returned if this is not set and output directory is also not set.]]> </doc> </method> <method name="setSkipOutputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Set the directory to which skipped records are written. By default it is the sub directory of the output _logs directory. User can stop writing skipped records by setting the value null. @param conf the configuration. @param path skip output directory path]]> </doc> </method> <method name="getMapperMaxSkipRecords" return="long" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the number of acceptable skip records surrounding the bad record PER bad record in mapper. The number includes the bad record as well. To turn the feature of detection/skipping of bad records off, set the value to 0. The framework tries to narrow down the skipped range by retrying until this threshold is met OR all attempts get exhausted for this task. Set the value to Long.MAX_VALUE to indicate that framework need not try to narrow down. Whatever records(depends on application) get skipped are acceptable. Default value is 0. @param conf the configuration @return maxSkipRecs acceptable skip records.]]> </doc> </method> <method name="setMapperMaxSkipRecords" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="maxSkipRecs" type="long"/> <doc> <![CDATA[Set the number of acceptable skip records surrounding the bad record PER bad record in mapper. The number includes the bad record as well. To turn the feature of detection/skipping of bad records off, set the value to 0. The framework tries to narrow down the skipped range by retrying until this threshold is met OR all attempts get exhausted for this task. Set the value to Long.MAX_VALUE to indicate that framework need not try to narrow down. Whatever records(depends on application) get skipped are acceptable. Default value is 0. @param conf the configuration @param maxSkipRecs acceptable skip records.]]> </doc> </method> <method name="getReducerMaxSkipGroups" return="long" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the number of acceptable skip groups surrounding the bad group PER bad group in reducer. The number includes the bad group as well. To turn the feature of detection/skipping of bad groups off, set the value to 0. The framework tries to narrow down the skipped range by retrying until this threshold is met OR all attempts get exhausted for this task. Set the value to Long.MAX_VALUE to indicate that framework need not try to narrow down. Whatever groups(depends on application) get skipped are acceptable. Default value is 0. @param conf the configuration @return maxSkipGrps acceptable skip groups.]]> </doc> </method> <method name="setReducerMaxSkipGroups" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="maxSkipGrps" type="long"/> <doc> <![CDATA[Set the number of acceptable skip groups surrounding the bad group PER bad group in reducer. The number includes the bad group as well. To turn the feature of detection/skipping of bad groups off, set the value to 0. The framework tries to narrow down the skipped range by retrying until this threshold is met OR all attempts get exhausted for this task. Set the value to Long.MAX_VALUE to indicate that framework need not try to narrow down. Whatever groups(depends on application) get skipped are acceptable. Default value is 0. @param conf the configuration @param maxSkipGrps acceptable skip groups.]]> </doc> </method> <field name="COUNTER_GROUP" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Special counters which are written by the application and are used by the framework for detecting bad records. For detecting bad records these counters must be incremented by the application.]]> </doc> </field> <field name="COUNTER_MAP_PROCESSED_RECORDS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Number of processed map records. @see SkipBadRecords#getAutoIncrMapperProcCount(Configuration)]]> </doc> </field> <field name="COUNTER_REDUCE_PROCESSED_GROUPS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Number of processed reduce groups. @see SkipBadRecords#getAutoIncrReducerProcCount(Configuration)]]> </doc> </field> <doc> <![CDATA[Utility class for skip bad records functionality. It contains various settings related to skipping of bad records. <p>Hadoop provides an optional mode of execution in which the bad records are detected and skipped in further attempts. <p>This feature can be used when map/reduce tasks crashes deterministically on certain input. This happens due to bugs in the map/reduce function. The usual course would be to fix these bugs. But sometimes this is not possible; perhaps the bug is in third party libraries for which the source code is not available. Due to this, the task never reaches to completion even with multiple attempts and complete data for that task is lost.</p> <p>With this feature, only a small portion of data is lost surrounding the bad record, which may be acceptable for some user applications. see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}</p> <p>The skipping mode gets kicked off after certain no of failures see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}</p> <p>In the skipping mode, the map/reduce task maintains the record range which is getting processed at all times. Before giving the input to the map/reduce function, it sends this record range to the Task tracker. If task crashes, the Task tracker knows which one was the last reported range. On further attempts that range get skipped.</p>]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.SkipBadRecords --> <!-- start interface org.apache.hadoop.mapred.TaskAttemptContext --> <interface name="TaskAttemptContext" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext} instead."> <implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <method name="getTaskAttemptID" return="org.apache.hadoop.mapred.TaskAttemptID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProgressible" return="org.apache.hadoop.util.Progressable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext} instead.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.TaskAttemptContext --> <!-- start class org.apache.hadoop.mapred.TaskAttemptID --> <class name="TaskAttemptID" extends="org.apache.hadoop.mapreduce.TaskAttemptID" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskAttemptID" type="org.apache.hadoop.mapred.TaskID, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}. @param taskId TaskID that this task belongs to @param id the task attempt number]]> </doc> </constructor> <constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int" static="false" final="false" visibility="public" deprecated="Use {@link #TaskAttemptID(String, int, TaskType, int, int)}."> <doc> <![CDATA[Constructs a TaskId object from given parts. @param jtIdentifier jobTracker identifier @param jobId job number @param isMap whether the tip is a map @param taskId taskId number @param id the task attempt number @deprecated Use {@link #TaskAttemptID(String, int, TaskType, int, int)}.]]> </doc> </constructor> <constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskId object from given parts. @param jtIdentifier jobTracker identifier @param jobId job number @param type the TaskType @param taskId taskId number @param id the task attempt number]]> </doc> </constructor> <constructor name="TaskAttemptID" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="downgrade" return="org.apache.hadoop.mapred.TaskAttemptID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="old" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> <doc> <![CDATA[Downgrade a new TaskAttemptID to an old one @param old the new id @return either old or a new TaskAttemptID constructed to match old]]> </doc> </method> <method name="getTaskID" return="org.apache.hadoop.mapred.TaskID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobID" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="read" return="org.apache.hadoop.mapred.TaskAttemptID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="forName" return="org.apache.hadoop.mapred.TaskAttemptID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="str" type="java.lang.String"/> <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> <doc> <![CDATA[Construct a TaskAttemptID object from given string @return constructed TaskAttemptID object or null if the given String is null @throws IllegalArgumentException if the given string is malformed]]> </doc> </method> <method name="getTaskAttemptIDsPattern" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="jtIdentifier" type="java.lang.String"/> <param name="jobId" type="java.lang.Integer"/> <param name="isMap" type="java.lang.Boolean"/> <param name="taskId" type="java.lang.Integer"/> <param name="attemptId" type="java.lang.Integer"/> <doc> <![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can be given null, in which case that part of the regex will be generic. For example to obtain a regex matching <i>all task attempt IDs</i> of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first map task</i>, we would use : <pre> TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null); </pre> which will return : <pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre> @param jtIdentifier jobTracker identifier, or null @param jobId job number, or null @param isMap whether the tip is a map, or null @param taskId taskId number, or null @param attemptId the task attempt number, or null @return a regex pattern matching TaskAttemptIDs]]> </doc> </method> <method name="getTaskAttemptIDsPattern" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="jtIdentifier" type="java.lang.String"/> <param name="jobId" type="java.lang.Integer"/> <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> <param name="taskId" type="java.lang.Integer"/> <param name="attemptId" type="java.lang.Integer"/> <doc> <![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can be given null, in which case that part of the regex will be generic. For example to obtain a regex matching <i>all task attempt IDs</i> of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first map task</i>, we would use : <pre> TaskAttemptID.getTaskAttemptIDsPattern(null, null, TaskType.MAP, 1, null); </pre> which will return : <pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre> @param jtIdentifier jobTracker identifier, or null @param jobId job number, or null @param type the {@link TaskType} @param taskId taskId number, or null @param attemptId the task attempt number, or null @return a regex pattern matching TaskAttemptIDs]]> </doc> </method> <doc> <![CDATA[TaskAttemptID represents the immutable and unique identifier for a task attempt. Each task attempt is one particular instance of a Map or Reduce Task identified by its TaskID. TaskAttemptID consists of 2 parts. First part is the {@link TaskID}, that this TaskAttemptID belongs to. Second part is the task attempt number. <br> An example TaskAttemptID is : <code>attempt_200707121733_0003_m_000005_0</code> , which represents the zeroth task attempt for the fifth map task in the third job running at the jobtracker started at <code>200707121733</code>. <p> Applications should never construct or parse TaskAttemptID strings , but rather use appropriate constructors or {@link #forName(String)} method. @see JobID @see TaskID]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.TaskAttemptID --> <!-- start class org.apache.hadoop.mapred.TaskCompletionEvent --> <class name="TaskCompletionEvent" extends="org.apache.hadoop.mapreduce.TaskCompletionEvent" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.TaskCompletionEvent} instead"> <constructor name="TaskCompletionEvent" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default constructor for Writable.]]> </doc> </constructor> <constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapred.TaskAttemptID, int, boolean, org.apache.hadoop.mapred.TaskCompletionEvent.Status, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor. eventId should be created externally and incremented per event for each job. @param eventId event id, event id should be unique and assigned in incrementally, starting from 0. @param taskId task id @param status task's status @param taskTrackerHttp task tracker's host:port for http.]]> </doc> </constructor> <method name="getTaskId" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="use {@link #getTaskAttemptId()} instead."> <doc> <![CDATA[Returns task id. @return task id @deprecated use {@link #getTaskAttemptId()} instead.]]> </doc> </method> <method name="getTaskAttemptId" return="org.apache.hadoop.mapred.TaskAttemptID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns task id. @return task id]]> </doc> </method> <method name="getTaskStatus" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns enum Status.SUCESS or Status.FAILURE. @return task tracker status]]> </doc> </method> <method name="setTaskId" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="use {@link #setTaskAttemptId(TaskAttemptID)} instead."> <param name="taskId" type="java.lang.String"/> <doc> <![CDATA[Sets task id. @param taskId @deprecated use {@link #setTaskAttemptId(TaskAttemptID)} instead.]]> </doc> </method> <method name="setTaskAttemptId" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/> <doc> <![CDATA[Sets task id. @param taskId]]> </doc> </method> <method name="setTaskStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="status" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"/> <doc> <![CDATA[Set task status. @param status]]> </doc> </method> <method name="setTaskRunTime" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="taskCompletionTime" type="int"/> <doc> <![CDATA[Set the task completion time @param taskCompletionTime time (in millisec) the task took to complete]]> </doc> </method> <method name="setEventId" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="eventId" type="int"/> <doc> <![CDATA[set event Id. should be assigned incrementally starting from 0. @param eventId]]> </doc> </method> <method name="setTaskTrackerHttp" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="taskTrackerHttp" type="java.lang.String"/> <doc> <![CDATA[Set task tracker http location. @param taskTrackerHttp]]> </doc> </method> <field name="EMPTY_ARRAY" type="org.apache.hadoop.mapred.TaskCompletionEvent[]" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This is used to track task completion events on job tracker. @deprecated Use {@link org.apache.hadoop.mapreduce.TaskCompletionEvent} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.TaskCompletionEvent --> <!-- start class org.apache.hadoop.mapred.TaskCompletionEvent.Status --> <class name="TaskCompletionEvent.Status" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapred.TaskCompletionEvent.Status --> <!-- start class org.apache.hadoop.mapred.TaskID --> <class name="TaskID" extends="org.apache.hadoop.mapreduce.TaskID" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int" static="false" final="false" visibility="public" deprecated="Use {@link #TaskID(String, int, TaskType, int)}"> <doc> <![CDATA[Constructs a TaskID object from given {@link JobID}. @param jobId JobID that this tip belongs to @param isMap whether the tip is a map @param id the tip number @deprecated Use {@link #TaskID(String, int, TaskType, int)}]]> </doc> </constructor> <constructor name="TaskID" type="java.lang.String, int, boolean, int" static="false" final="false" visibility="public" deprecated="Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType, int)}"> <doc> <![CDATA[Constructs a TaskInProgressId object from given parts. @param jtIdentifier jobTracker identifier @param jobId job number @param isMap whether the tip is a map @param id the tip number @deprecated Use {@link #TaskID(org.apache.hadoop.mapreduce.JobID, TaskType, int)}]]> </doc> </constructor> <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskID object from given {@link JobID}. @param jobId JobID that this tip belongs to @param type the {@link TaskType} @param id the tip number]]> </doc> </constructor> <constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskInProgressId object from given parts. @param jtIdentifier jobTracker identifier @param jobId job number @param type the {@link TaskType} @param id the tip number]]> </doc> </constructor> <constructor name="TaskID" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="downgrade" return="org.apache.hadoop.mapred.TaskID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="old" type="org.apache.hadoop.mapreduce.TaskID"/> <doc> <![CDATA[Downgrade a new TaskID to an old one @param old a new or old TaskID @return either old or a new TaskID build to match old]]> </doc> </method> <method name="read" return="org.apache.hadoop.mapred.TaskID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getJobID" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTaskIDsPattern" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType, Integer)}"> <param name="jtIdentifier" type="java.lang.String"/> <param name="jobId" type="java.lang.Integer"/> <param name="isMap" type="java.lang.Boolean"/> <param name="taskId" type="java.lang.Integer"/> <doc> <![CDATA[Returns a regex pattern which matches task IDs. Arguments can be given null, in which case that part of the regex will be generic. For example to obtain a regex matching <i>the first map task</i> of <i>any jobtracker</i>, of <i>any job</i>, we would use : <pre> TaskID.getTaskIDsPattern(null, null, true, 1); </pre> which will return : <pre> "task_[^_]*_[0-9]*_m_000001*" </pre> @param jtIdentifier jobTracker identifier, or null @param jobId job number, or null @param isMap whether the tip is a map, or null @param taskId taskId number, or null @return a regex pattern matching TaskIDs @deprecated Use {@link TaskID#getTaskIDsPattern(String, Integer, TaskType, Integer)}]]> </doc> </method> <method name="getTaskIDsPattern" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="jtIdentifier" type="java.lang.String"/> <param name="jobId" type="java.lang.Integer"/> <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> <param name="taskId" type="java.lang.Integer"/> <doc> <![CDATA[Returns a regex pattern which matches task IDs. Arguments can be given null, in which case that part of the regex will be generic. For example to obtain a regex matching <i>the first map task</i> of <i>any jobtracker</i>, of <i>any job</i>, we would use : <pre> TaskID.getTaskIDsPattern(null, null, true, 1); </pre> which will return : <pre> "task_[^_]*_[0-9]*_m_000001*" </pre> @param jtIdentifier jobTracker identifier, or null @param jobId job number, or null @param type the {@link TaskType}, or null @param taskId taskId number, or null @return a regex pattern matching TaskIDs]]> </doc> </method> <method name="forName" return="org.apache.hadoop.mapred.TaskID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="str" type="java.lang.String"/> <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> </method> <doc> <![CDATA[TaskID represents the immutable and unique identifier for a Map or Reduce Task. Each TaskID encompasses multiple attempts made to execute the Map or Reduce Task, each of which are uniquely indentified by their TaskAttemptID. TaskID consists of 3 parts. First part is the {@link JobID}, that this TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r' representing whether the task is a map task or a reduce task. And the third part is the task number. <br> An example TaskID is : <code>task_200707121733_0003_m_000005</code> , which represents the fifth map task in the third job running at the jobtracker started at <code>200707121733</code>. <p> Applications should never construct or parse TaskID strings , but rather use appropriate constructors or {@link #forName(String)} method. @see JobID @see TaskAttemptID]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.TaskID --> <!-- start class org.apache.hadoop.mapred.TaskLog.Reader --> <class name="TaskLog.Reader" extends="java.io.InputStream" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskLog.Reader" type="org.apache.hadoop.mapred.TaskAttemptID, org.apache.hadoop.mapred.TaskLog.LogName, long, long, boolean" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read a log file from start to end positions. The offsets may be negative, in which case they are relative to the end of the file. For example, Reader(taskid, kind, 0, -1) is the entire file and Reader(taskid, kind, -4197, -1) is the last 4196 bytes. @param taskid the id of the task to read the log file for @param kind the kind of log to read @param start the offset to read from (negative is relative to tail) @param end the offset to read upto (negative is relative to tail) @param isCleanup whether the attempt is cleanup attempt or not @throws IOException]]> </doc> </constructor> <method name="read" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="read" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="buffer" type="byte[]"/> <param name="offset" type="int"/> <param name="length" type="int"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="available" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> </class> <!-- end class org.apache.hadoop.mapred.TaskLog.Reader --> <!-- start class org.apache.hadoop.mapred.TaskLogAppender --> <class name="TaskLogAppender" extends="org.apache.log4j.FileAppender" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskLogAppender" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="activateOptions" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="append" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="event" type="org.apache.log4j.spi.LoggingEvent"/> </method> <method name="flush" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTaskId" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Getter/Setter methods for log4j.]]> </doc> </method> <method name="setTaskId" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskId" type="java.lang.String"/> </method> <method name="getTotalLogFileSize" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setTotalLogFileSize" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="logSize" type="long"/> </method> <method name="setIsCleanup" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="isCleanup" type="boolean"/> <doc> <![CDATA[Set whether the task is a cleanup attempt or not. @param isCleanup true if the task is cleanup attempt, false otherwise.]]> </doc> </method> <method name="getIsCleanup" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get whether task is cleanup attempt or not. @return true if the task is cleanup attempt, false otherwise.]]> </doc> </method> <doc> <![CDATA[A simple log4j-appender for the task child's map-reduce system logs.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.TaskLogAppender --> <!-- start class org.apache.hadoop.mapred.TaskReport --> <class name="TaskReport" extends="org.apache.hadoop.mapreduce.TaskReport" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.TaskReport} instead"> <constructor name="TaskReport" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getTaskID" return="org.apache.hadoop.mapred.TaskID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The id of the task.]]> </doc> </method> <method name="getCounters" return="org.apache.hadoop.mapred.Counters" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setSuccessfulAttempt" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="t" type="org.apache.hadoop.mapred.TaskAttemptID"/> <doc> <![CDATA[set successful attempt ID of the task.]]> </doc> </method> <method name="getSuccessfulTaskAttempt" return="org.apache.hadoop.mapred.TaskAttemptID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the attempt ID that took this task to completion]]> </doc> </method> <method name="setRunningTaskAttempts" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="runningAttempts" type="java.util.Collection"/> <doc> <![CDATA[set running attempt(s) of the task.]]> </doc> </method> <method name="getRunningTaskAttempts" return="java.util.Collection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the running task attempt IDs for this task]]> </doc> </method> <method name="setFinishTime" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="finishTime" type="long"/> <doc> <![CDATA[set finish time of task. @param finishTime finish time of task.]]> </doc> </method> <method name="setStartTime" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="startTime" type="long"/> <doc> <![CDATA[set start time of the task.]]> </doc> </method> <doc> <![CDATA[A report on the state of a task. @deprecated Use {@link org.apache.hadoop.mapreduce.TaskReport} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.TaskReport --> <!-- start class org.apache.hadoop.mapred.TextInputFormat --> <class name="TextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat} instead."> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <constructor name="TextInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="isSplitable" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="file" type="org.apache.hadoop.fs.Path"/> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text.. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.TextInputFormat --> <!-- start class org.apache.hadoop.mapred.TextOutputFormat --> <class name="TextOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead."> <constructor name="TextOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An {@link OutputFormat} that writes plain text files. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.TextOutputFormat --> <!-- start class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter --> <class name="TextOutputFormat.LineRecordWriter" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.RecordWriter"/> <constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="out" type="java.io.DataOutputStream" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> </class> <!-- end class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter --> <!-- start class org.apache.hadoop.mapred.Utils --> <class name="Utils" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Utils" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[A utility class. It provides A path filter utility to filter out output/part files in the output dir]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.Utils --> <!-- start class org.apache.hadoop.mapred.Utils.OutputFileUtils --> <class name="Utils.OutputFileUtils" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Utils.OutputFileUtils" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> </class> <!-- end class org.apache.hadoop.mapred.Utils.OutputFileUtils --> <!-- start class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter --> <class name="Utils.OutputFileUtils.OutputFilesFilter" extends="org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Utils.OutputFileUtils.OutputFilesFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> </method> <doc> <![CDATA[This class filters output(part) files from the given directory It does not accept files with filenames _logs and _SUCCESS. This can be used to list paths of output directory as follows: Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, new OutputFilesFilter()));]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputFilesFilter --> <!-- start class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter --> <class name="Utils.OutputFileUtils.OutputLogFilter" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.fs.PathFilter"/> <constructor name="Utils.OutputFileUtils.OutputLogFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> </method> <doc> <![CDATA[This class filters log files from directory given It doesnt accept paths having _logs. This can be used to list paths of output directory as follows: Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, new OutputLogFilter()));]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.Utils.OutputFileUtils.OutputLogFilter --> </package> <package name="org.apache.hadoop.mapred.jobcontrol"> <!-- start class org.apache.hadoop.mapred.jobcontrol.Job --> <class name="Job" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link ControlledJob} instead."> <constructor name="Job" type="org.apache.hadoop.mapred.JobConf, java.util.ArrayList" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Construct a job. @param jobConf a mapred job configuration representing a job to be executed. @param dependingJobs an array of jobs the current job depends on]]> </doc> </constructor> <constructor name="Job" type="org.apache.hadoop.mapred.JobConf" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="getAssignedJobID" return="org.apache.hadoop.mapred.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]> </doc> </method> <method name="setAssignedJobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="setAssignedJobID should not be called. JOBID is set by the framework."> <param name="mapredJobID" type="org.apache.hadoop.mapred.JobID"/> <doc> <![CDATA[@deprecated setAssignedJobID should not be called. JOBID is set by the framework.]]> </doc> </method> <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the mapred job conf of this job]]> </doc> </method> <method name="setJobConf" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Set the mapred job conf for this job. @param jobConf the mapred job conf for this job.]]> </doc> </method> <method name="getState" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the state of this job]]> </doc> </method> <method name="getJobClient" return="org.apache.hadoop.mapred.JobClient" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the job client of this job]]> </doc> </method> <method name="getDependingJobs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the depending jobs of this job]]> </doc> </method> <field name="SUCCESS" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="WAITING" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="RUNNING" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="READY" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="FAILED" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="DEPENDENT_FAILED" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[@deprecated Use {@link ControlledJob} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.jobcontrol.Job --> <!-- start class org.apache.hadoop.mapred.jobcontrol.JobControl --> <class name="JobControl" extends="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl} instead"> <constructor name="JobControl" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a job control for a group of jobs. @param groupName a name identifying this group]]> </doc> </constructor> <method name="getWaitingJobs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the waiting state]]> </doc> </method> <method name="getRunningJobs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the running state]]> </doc> </method> <method name="getReadyJobs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the ready state]]> </doc> </method> <method name="getSuccessfulJobs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the success state]]> </doc> </method> <method name="getFailedJobs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="addJobs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobs" type="java.util.Collection"/> <doc> <![CDATA[Add a collection of jobs @param jobs]]> </doc> </method> <method name="getState" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the thread state]]> </doc> </method> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.jobcontrol.JobControl --> </package> <package name="org.apache.hadoop.mapred.join"> <!-- start class org.apache.hadoop.mapred.join.ArrayListBackedIterator --> <class name="ArrayListBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator} instead"> <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> <constructor name="ArrayListBackedIterator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="ArrayListBackedIterator" type="java.util.ArrayList" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class provides an implementation of ResetableIterator. The implementation uses an {@link java.util.ArrayList} to store elements added to it, replaying them as requested. Prefer {@link StreamBackedIterator}. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.ArrayListBackedIterator --> <!-- start interface org.apache.hadoop.mapred.join.ComposableInputFormat --> <interface name="ComposableInputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat} instead"> <implements name="org.apache.hadoop.mapred.InputFormat"/> <method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Refinement of InputFormat requiring implementors to provide ComposableRecordReader instead of RecordReader. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat} instead]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.join.ComposableInputFormat --> <!-- start interface org.apache.hadoop.mapred.join.ComposableRecordReader --> <interface name="ComposableRecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader} instead"> <implements name="org.apache.hadoop.mapred.RecordReader"/> <implements name="java.lang.Comparable"/> <method name="id" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the position in the collector this class occupies.]]> </doc> </method> <method name="key" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the key this RecordReader would supply on a call to next(K,V)]]> </doc> </method> <method name="key" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Clone the key at the head of this RecordReader into the object provided.]]> </doc> </method> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns true if the stream is not empty, but provides no guarantee that a call to next(K,V) will succeed.]]> </doc> </method> <method name="skip" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> </doc> </method> <method name="accept" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[While key-value pairs from this RecordReader match the given key, register them with the JoinCollector provided.]]> </doc> </method> <doc> <![CDATA[Additional operations required of a RecordReader to participate in a join. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader} instead]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.join.ComposableRecordReader --> <!-- start class org.apache.hadoop.mapred.join.CompositeInputFormat --> <class name="CompositeInputFormat" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat} instead"> <implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/> <constructor name="CompositeInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setFormat" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Interpret a given string as a composite expression. {@code func ::= <ident>([<func>,]*<func>) func ::= tbl(<class>,"<path>") class ::= @see java.lang.Class#forName(java.lang.String) path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) } Reads expression from the <tt>mapred.join.expr</tt> property and user-supplied join types from <tt>mapred.join.define.<ident></tt> types. Paths supplied to <tt>tbl</tt> are given as input paths to the InputFormat class listed. @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> </doc> </method> <method name="addDefaults" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Adds the default set of identifiers to the parser.]]> </doc> </method> <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="numSplits" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the ith split from each child to the ith composite split.]]> </doc> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Construct a CompositeRecordReader for the children of this InputFormat as defined in the init expression. The outermost join need only be composable, not necessarily a composite. Mandating TupleWritable isn't strictly correct.]]> </doc> </method> <method name="compose" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="inf" type="java.lang.Class"/> <param name="path" type="java.lang.String"/> <doc> <![CDATA[Convenience method for constructing composite formats. Given InputFormat class (inf), path (p) return: {@code tbl(<inf>, <p>) }]]> </doc> </method> <method name="compose" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="op" type="java.lang.String"/> <param name="inf" type="java.lang.Class"/> <param name="path" type="java.lang.String[]"/> <doc> <![CDATA[Convenience method for constructing composite formats. Given operation (op), Object class (inf), set of paths (p) return: {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> </doc> </method> <method name="compose" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="op" type="java.lang.String"/> <param name="inf" type="java.lang.Class"/> <param name="path" type="org.apache.hadoop.fs.Path[]"/> <doc> <![CDATA[Convenience method for constructing composite formats. Given operation (op), Object class (inf), set of paths (p) return: {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> </doc> </method> <doc> <![CDATA[An InputFormat capable of performing joins over a set of data sources sorted and partitioned the same way. @see #setFormat A user may define new join types by setting the property <tt>mapred.join.define.<ident></tt> to a classname. In the expression <tt>mapred.join.expr</tt>, the identifier will be assumed to be a ComposableRecordReader. <tt>mapred.join.keycomparator</tt> can be a classname used to compare keys in the join. @see JoinRecordReader @see MultiFilterRecordReader @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.CompositeInputFormat --> <!-- start class org.apache.hadoop.mapred.join.CompositeInputSplit --> <class name="CompositeInputSplit" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit} instead"> <implements name="org.apache.hadoop.mapred.InputSplit"/> <constructor name="CompositeInputSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CompositeInputSplit" type="int" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="s" type="org.apache.hadoop.mapred.InputSplit"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add an InputSplit to this collection. @throws IOException If capacity was not specified during construction or if capacity has been reached.]]> </doc> </method> <method name="get" return="org.apache.hadoop.mapred.InputSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <doc> <![CDATA[Get ith child InputSplit.]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return the aggregate length of all child InputSplits currently added.]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the length of ith child InputSplit.]]> </doc> </method> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Collect a set of hosts from all child InputSplits.]]> </doc> </method> <method name="getLocation" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[getLocations from ith InputSplit.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Write splits in the following format. {@code <count><class1><class2>...<classn><split1><split2>...<splitn> }]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc} @throws IOException If the child InputSplit cannot be read, typically for faliing access checks.]]> </doc> </method> <doc> <![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted into this collection must have a public default constructor. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.CompositeInputSplit --> <!-- start class org.apache.hadoop.mapred.join.CompositeRecordReader --> <class name="CompositeRecordReader" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader} instead"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="CompositeRecordReader" type="int, int, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create a RecordReader with <tt>capacity</tt> children to position <tt>id</tt> in the parent reader. The id of a root CompositeRecordReader is -1 by convention, but relying on this is not recommended.]]> </doc> </constructor> <method name="combine" return="boolean" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/> </method> <method name="id" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the position in the collector this class occupies.]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getRecordReaderQueue" return="java.util.PriorityQueue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return sorted list of RecordReaders for this composite.]]> </doc> </method> <method name="getComparator" return="org.apache.hadoop.io.WritableComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return comparator defining the ordering for RecordReaders in this composite.]]> </doc> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="rr" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add a RecordReader to this collection. The id() of a RecordReader determines where in the Tuple its entry will appear. Adding RecordReaders with the same id has undefined behavior.]]> </doc> </method> <method name="key" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the key for the current join or the value at the top of the RecordReader heap.]]> </doc> </method> <method name="key" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Clone the key at the top of this RR into the given object.]]> </doc> </method> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return true if it is possible that this could emit more values.]]> </doc> </method> <method name="skip" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Pass skip key to child RRs.]]> </doc> </method> <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Obtain an iterator over the child RRs apropos of the value type ultimately emitted from this join.]]> </doc> </method> <method name="accept" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[If key provided matches that of this Composite, give JoinCollector iterator over values it may emit.]]> </doc> </method> <method name="fillJoinCollector" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="iterkey" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For all child RRs offering the key provided, obtain an iterator at that position in the JoinCollector.]]> </doc> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> <doc> <![CDATA[Implement Comparable contract (compare key of join or head of heap with that of another).]]> </doc> </method> <method name="createKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a new key value common to all child RRs. @throws ClassCastException if key classes differ.]]> </doc> </method> <method name="createInternalValue" return="org.apache.hadoop.mapred.join.TupleWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Create a value to be used internally for joins.]]> </doc> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Unsupported (returns zero in all cases).]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close all child RRs.]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Report progress as the minimum of all child RR progress.]]> </doc> </method> <field name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector" transient="false" volatile="false" static="false" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="kids" type="org.apache.hadoop.mapred.join.ComposableRecordReader[]" transient="false" volatile="false" static="false" final="true" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key type and partitioning. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.CompositeRecordReader --> <!-- start class org.apache.hadoop.mapred.join.InnerJoinRecordReader --> <class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader} instead."> <method name="combine" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> <doc> <![CDATA[Return true iff the tuple is full (all data sources contain this key).]]> </doc> </method> <doc> <![CDATA[Full inner join. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.InnerJoinRecordReader --> <!-- start class org.apache.hadoop.mapred.join.JoinRecordReader --> <class name="JoinRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.JoinRecordReader} instead"> <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> <constructor name="JoinRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Emit the next set of key, value pairs as defined by the child RecordReaders and operation associated with this composite RR.]]> </doc> </method> <method name="createValue" return="org.apache.hadoop.mapred.join.TupleWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return an iterator wrapping the JoinCollector.]]> </doc> </method> <doc> <![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.JoinRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.JoinRecordReader --> <!-- start class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator --> <class name="JoinRecordReader.JoinDelegationIterator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> <constructor name="JoinRecordReader.JoinDelegationIterator" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="org.apache.hadoop.mapred.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Since the JoinCollector is effecting our operation, we need only provide an iterator proxy wrapping its operation.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator --> <!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader --> <class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader} instead"> <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> <constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="emit" return="V" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For each tuple emitted, return a value (typically one of the values in the tuple). Modifying the Writables in the tuple is permitted and unlikely to affect join behavior in most cases, but it is not recommended. It's safer to clone first.]]> </doc> </method> <method name="combine" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> <doc> <![CDATA[Default implementation offers {@link #emit} every Tuple from the collector (the outer join of child RRs).]]> </doc> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="createValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return an iterator returning a single value from the tuple. @see MultiFilterDelegationIterator]]> </doc> </method> <doc> <![CDATA[Base class for Composite join returning values derived from multiple sources, but generally not tuples. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader --> <!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator --> <class name="MultiFilterRecordReader.MultiFilterDelegationIterator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> <constructor name="MultiFilterRecordReader.MultiFilterDelegationIterator" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Proxy the JoinCollector, but include callback to emit.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator --> <!-- start class org.apache.hadoop.mapred.join.OuterJoinRecordReader --> <class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader} instead"> <method name="combine" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> <doc> <![CDATA[Emit everything from the collector.]]> </doc> </method> <doc> <![CDATA[Full outer join. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.OuterJoinRecordReader --> <!-- start class org.apache.hadoop.mapred.join.OverrideRecordReader --> <class name="OverrideRecordReader" extends="org.apache.hadoop.mapred.join.MultiFilterRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader} instead"> <method name="emit" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/> <doc> <![CDATA[Emit the value with the highest position in the tuple.]]> </doc> </method> <method name="fillJoinCollector" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="iterkey" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Instead of filling the JoinCollector with iterators from all data sources, fill only the rightmost for this key. This not only saves space by discarding the other sources, but it also emits the number of key-value pairs in the preferred RecordReader instead of repeating that stream n times, where n is the cardinality of the cross product of the discarded streams for the given key.]]> </doc> </method> <doc> <![CDATA[Prefer the "rightmost" data source for this key. For example, <tt>override(S1,S2,S3)</tt> will prefer values from S3 over S2, and values from S2 over S1 for all keys emitted from all sources. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.OverrideRecordReader --> <!-- start class org.apache.hadoop.mapred.join.Parser --> <class name="Parser" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.Parser} instead"> <constructor name="Parser" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[Very simple shift-reduce parser for join expressions. This should be sufficient for the user extension permitted now, but ought to be replaced with a parser generator if more complex grammars are supported. In particular, this "shift-reduce" parser has no states. Each set of formals requires a different internal node type, which is responsible for interpreting the list of tokens it receives. This is sufficient for the current grammar, but it has several annoying properties that might inhibit extension. In particular, parenthesis are always function calls; an algebraic or filter grammar would not only require a node type, but must also work around the internals of this parser. For most other cases, adding classes to the hierarchy- particularly by extending JoinRecordReader and MultiFilterRecordReader- is fairly straightforward. One need only override the relevant method(s) (usually only {@link CompositeRecordReader#combine}) and include a property to map its value to an identifier in the parser. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.Parser} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.Parser --> <!-- start class org.apache.hadoop.mapred.join.Parser.Node --> <class name="Parser.Node" extends="java.lang.Object" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/> <constructor name="Parser.Node" type="java.lang.String" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="addIdentifier" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <param name="ident" type="java.lang.String"/> <param name="mcstrSig" type="java.lang.Class[]"/> <param name="nodetype" type="java.lang.Class"/> <param name="cl" type="java.lang.Class"/> <exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/> <doc> <![CDATA[For a given identifier, add a mapping to the nodetype for the parse tree and to the ComposableRecordReader to be created, including the formals required to invoke the constructor. The nodetype and constructor signature should be filled in from the child node.]]> </doc> </method> <method name="setID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="id" type="int"/> </method> <method name="setKeyComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="cmpcl" type="java.lang.Class"/> </method> <field name="rrCstrMap" type="java.util.Map" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="id" type="int" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="ident" type="java.lang.String" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="cmpcl" type="java.lang.Class" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> </class> <!-- end class org.apache.hadoop.mapred.join.Parser.Node --> <!-- start class org.apache.hadoop.mapred.join.Parser.NodeToken --> <class name="Parser.NodeToken" extends="org.apache.hadoop.mapred.join.Parser.Token" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.mapred.join.Parser.NodeToken --> <!-- start class org.apache.hadoop.mapred.join.Parser.NumToken --> <class name="Parser.NumToken" extends="org.apache.hadoop.mapred.join.Parser.Token" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Parser.NumToken" type="double" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getNum" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.mapred.join.Parser.NumToken --> <!-- start class org.apache.hadoop.mapred.join.Parser.StrToken --> <class name="Parser.StrToken" extends="org.apache.hadoop.mapred.join.Parser.Token" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Parser.StrToken" type="org.apache.hadoop.mapred.join.Parser.TType, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getStr" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.mapred.join.Parser.StrToken --> <!-- start class org.apache.hadoop.mapred.join.Parser.Token --> <class name="Parser.Token" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <method name="getType" return="org.apache.hadoop.mapred.join.Parser.TType" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getNum" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getStr" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Tagged-union type for tokens from the join expression. @see Parser.TType]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.Parser.Token --> <!-- start class org.apache.hadoop.mapred.join.Parser.TType --> <class name="Parser.TType" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapred.join.Parser.TType[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapred.join.Parser.TType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapred.join.Parser.TType --> <!-- start interface org.apache.hadoop.mapred.join.ResetableIterator --> <interface name="ResetableIterator" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.ResetableIterator} instead"> <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> <doc> <![CDATA[This defines an interface to a stateful Iterator that can replay elements added to it directly. Note that this does not extend {@link java.util.Iterator}. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.ResetableIterator} instead]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.join.ResetableIterator --> <!-- start class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY --> <class name="ResetableIterator.EMPTY" extends="org.apache.hadoop.mapreduce.lib.join.ResetableIterator.EMPTY" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> <constructor name="ResetableIterator.EMPTY" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> </class> <!-- end class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY --> <!-- start class org.apache.hadoop.mapred.join.StreamBackedIterator --> <class name="StreamBackedIterator" extends="org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator} instead"> <implements name="org.apache.hadoop.mapred.join.ResetableIterator"/> <constructor name="StreamBackedIterator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class provides an implementation of ResetableIterator. This implementation uses a byte array to store elements added to it. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.StreamBackedIterator --> <!-- start class org.apache.hadoop.mapred.join.TupleWritable --> <class name="TupleWritable" extends="org.apache.hadoop.mapreduce.lib.join.TupleWritable" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.TupleWritable} instead"> <constructor name="TupleWritable" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create an empty tuple with no allocated storage for writables.]]> </doc> </constructor> <constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Initialize tuple with storage; unknown whether any of them contain "written" values.]]> </doc> </constructor> <doc> <![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s. This is *not* a general-purpose tuple type. In almost all cases, users are encouraged to implement their own serializable types, which can perform better validation and provide more efficient encodings than this class is capable. TupleWritable relies on the join framework for type safety and assumes its instances will rarely be persisted, assumptions not only incompatible with, but contrary to the general case. @see org.apache.hadoop.io.Writable @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.TupleWritable} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.TupleWritable --> <!-- start class org.apache.hadoop.mapred.join.WrappedRecordReader --> <class name="WrappedRecordReader" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader} instead"> <implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/> <method name="id" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="key" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the key at the head of this RR.]]> </doc> </method> <method name="key" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="qkey" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Clone the key at the head of this RR into the object supplied.]]> </doc> </method> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return true if the RR- including the k,v pair stored in this object- is exhausted.]]> </doc> </method> <method name="skip" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> </doc> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read the next k,v pair into the head of this object; return true iff the RR and this are exhausted.]]> </doc> </method> <method name="accept" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add an iterator to the collector at the position occupied by this RecordReader over the values in this stream paired with the key provided (ie register a stream of values from this source matching K with a collector).]]> </doc> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="U"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Write key-value pair at the head of this stream to the objects provided; get next key-value pair from proxied RR.]]> </doc> </method> <method name="createKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Request new key from proxied RR.]]> </doc> </method> <method name="createValue" return="U" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Request new value from proxied RR.]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Request progress from proxied RR.]]> </doc> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Request position from proxied RR.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Forward close request to proxied RR.]]> </doc> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/> <doc> <![CDATA[Implement Comparable contract (compare key at head of proxied RR with that of another).]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="java.lang.Object"/> <doc> <![CDATA[Return true iff compareTo(other) retn true.]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Proxy class for a RecordReader participating in the join framework. This class keeps track of the "head" key-value pair for the provided RecordReader and keeps a store of values matching a key when this source is participating in a join. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.join.WrappedRecordReader --> </package> <package name="org.apache.hadoop.mapred.lib"> <!-- start class org.apache.hadoop.mapred.lib.BinaryPartitioner --> <class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner} instead."> <implements name="org.apache.hadoop.mapred.Partitioner"/> <constructor name="BinaryPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <doc> <![CDATA[Partition {@link BinaryComparable} keys using a configurable part of the bytes array returned by {@link BinaryComparable#getBytes()}. @see org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner @deprecated Use {@link org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.BinaryPartitioner --> <!-- start class org.apache.hadoop.mapred.lib.ChainMapper --> <class name="ChainMapper" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.chain.ChainMapper} instead"> <implements name="org.apache.hadoop.mapred.Mapper"/> <constructor name="ChainMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor.]]> </doc> </constructor> <method name="addMapper" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="klass" type="java.lang.Class"/> <param name="inputKeyClass" type="java.lang.Class"/> <param name="inputValueClass" type="java.lang.Class"/> <param name="outputKeyClass" type="java.lang.Class"/> <param name="outputValueClass" type="java.lang.Class"/> <param name="byValue" type="boolean"/> <param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Adds a Mapper class to the chain job's JobConf. <p/> It has to be specified how key and values are passed from one element of the chain to the next, by value or by reference. If a Mapper leverages the assumed semantics that the key and values are not modified by the collector 'by value' must be used. If the Mapper does not expect this semantics, as an optimization to avoid serialization and deserialization 'by reference' can be used. <p/> For the added Mapper the configuration given for it, <code>mapperConf</code>, have precedence over the job's JobConf. This precedence is in effect when the task is running. <p/> IMPORTANT: There is no need to specify the output key/value classes for the ChainMapper, this is done by the addMapper for the last mapper in the chain <p/> @param job job's JobConf to add the Mapper class. @param klass the Mapper class to add. @param inputKeyClass mapper input key class. @param inputValueClass mapper input value class. @param outputKeyClass mapper output key class. @param outputValueClass mapper output value class. @param byValue indicates if key/values should be passed by value to the next Mapper in the chain, if any. @param mapperConf a JobConf with the configuration for the Mapper class. It is recommended to use a JobConf without default values using the <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Configures the ChainMapper and all the Mappers in the chain. <p/> If this method is overriden <code>super.configure(...)</code> should be invoked at the beginning of the overwriter method.]]> </doc> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <param name="value" type="java.lang.Object"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Chains the <code>map(...)</code> methods of the Mappers in the chain.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Closes the ChainMapper and all the Mappers in the chain. <p/> If this method is overriden <code>super.close()</code> should be invoked at the end of the overwriter method.]]> </doc> </method> <doc> <![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single Map task. <p/> The Mapper classes are invoked in a chained (or piped) fashion, the output of the first becomes the input of the second, and so on until the last Mapper, the output of the last Mapper will be written to the task's output. <p/> The key functionality of this feature is that the Mappers in the chain do not need to be aware that they are executed in a chain. This enables having reusable specialized Mappers that can be combined to perform composite operations within a single task. <p/> Special care has to be taken when creating chains that the key/values output by a Mapper are valid for the following Mapper in the chain. It is assumed all Mappers and the Reduce in the chain use maching output and input key and value classes as no conversion is done by the chaining code. <p/> Using the ChainMapper and the ChainReducer classes is possible to compose Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And immediate benefit of this pattern is a dramatic reduction in disk IO. <p/> IMPORTANT: There is no need to specify the output key/value classes for the ChainMapper, this is done by the addMapper for the last mapper in the chain. <p/> ChainMapper usage pattern: <p/> <pre> ... conf.setJobName("chain"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); <p/> JobConf mapAConf = new JobConf(false); ... ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, Text.class, Text.class, true, mapAConf); <p/> JobConf mapBConf = new JobConf(false); ... ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, LongWritable.class, Text.class, false, mapBConf); <p/> JobConf reduceConf = new JobConf(false); ... ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, Text.class, Text.class, true, reduceConf); <p/> ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, LongWritable.class, Text.class, false, null); <p/> ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, LongWritable.class, LongWritable.class, true, null); <p/> FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); ... <p/> JobClient jc = new JobClient(conf); RunningJob job = jc.submitJob(conf); ... </pre> @deprecated Use {@link org.apache.hadoop.mapreduce.lib.chain.ChainMapper} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.ChainMapper --> <!-- start class org.apache.hadoop.mapred.lib.ChainReducer --> <class name="ChainReducer" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.chain.ChainReducer} instead"> <implements name="org.apache.hadoop.mapred.Reducer"/> <constructor name="ChainReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor.]]> </doc> </constructor> <method name="setReducer" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="klass" type="java.lang.Class"/> <param name="inputKeyClass" type="java.lang.Class"/> <param name="inputValueClass" type="java.lang.Class"/> <param name="outputKeyClass" type="java.lang.Class"/> <param name="outputValueClass" type="java.lang.Class"/> <param name="byValue" type="boolean"/> <param name="reducerConf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Sets the Reducer class to the chain job's JobConf. <p/> It has to be specified how key and values are passed from one element of the chain to the next, by value or by reference. If a Reducer leverages the assumed semantics that the key and values are not modified by the collector 'by value' must be used. If the Reducer does not expect this semantics, as an optimization to avoid serialization and deserialization 'by reference' can be used. <p/> For the added Reducer the configuration given for it, <code>reducerConf</code>, have precedence over the job's JobConf. This precedence is in effect when the task is running. <p/> IMPORTANT: There is no need to specify the output key/value classes for the ChainReducer, this is done by the setReducer or the addMapper for the last element in the chain. @param job job's JobConf to add the Reducer class. @param klass the Reducer class to add. @param inputKeyClass reducer input key class. @param inputValueClass reducer input value class. @param outputKeyClass reducer output key class. @param outputValueClass reducer output value class. @param byValue indicates if key/values should be passed by value to the next Mapper in the chain, if any. @param reducerConf a JobConf with the configuration for the Reducer class. It is recommended to use a JobConf without default values using the <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> </doc> </method> <method name="addMapper" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="klass" type="java.lang.Class"/> <param name="inputKeyClass" type="java.lang.Class"/> <param name="inputValueClass" type="java.lang.Class"/> <param name="outputKeyClass" type="java.lang.Class"/> <param name="outputValueClass" type="java.lang.Class"/> <param name="byValue" type="boolean"/> <param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Adds a Mapper class to the chain job's JobConf. <p/> It has to be specified how key and values are passed from one element of the chain to the next, by value or by reference. If a Mapper leverages the assumed semantics that the key and values are not modified by the collector 'by value' must be used. If the Mapper does not expect this semantics, as an optimization to avoid serialization and deserialization 'by reference' can be used. <p/> For the added Mapper the configuration given for it, <code>mapperConf</code>, have precedence over the job's JobConf. This precedence is in effect when the task is running. <p/> IMPORTANT: There is no need to specify the output key/value classes for the ChainMapper, this is done by the addMapper for the last mapper in the chain . @param job chain job's JobConf to add the Mapper class. @param klass the Mapper class to add. @param inputKeyClass mapper input key class. @param inputValueClass mapper input value class. @param outputKeyClass mapper output key class. @param outputValueClass mapper output value class. @param byValue indicates if key/values should be passed by value to the next Mapper in the chain, if any. @param mapperConf a JobConf with the configuration for the Mapper class. It is recommended to use a JobConf without default values using the <code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Configures the ChainReducer, the Reducer and all the Mappers in the chain. <p/> If this method is overriden <code>super.configure(...)</code> should be invoked at the beginning of the overwriter method.]]> </doc> </method> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <param name="values" type="java.util.Iterator"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Chains the <code>reduce(...)</code> method of the Reducer with the <code>map(...) </code> methods of the Mappers in the chain.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Closes the ChainReducer, the Reducer and all the Mappers in the chain. <p/> If this method is overriden <code>super.close()</code> should be invoked at the end of the overwriter method.]]> </doc> </method> <doc> <![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a Reducer within the Reducer task. <p/> For each record output by the Reducer, the Mapper classes are invoked in a chained (or piped) fashion, the output of the first becomes the input of the second, and so on until the last Mapper, the output of the last Mapper will be written to the task's output. <p/> The key functionality of this feature is that the Mappers in the chain do not need to be aware that they are executed after the Reducer or in a chain. This enables having reusable specialized Mappers that can be combined to perform composite operations within a single task. <p/> Special care has to be taken when creating chains that the key/values output by a Mapper are valid for the following Mapper in the chain. It is assumed all Mappers and the Reduce in the chain use maching output and input key and value classes as no conversion is done by the chaining code. <p/> Using the ChainMapper and the ChainReducer classes is possible to compose Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And immediate benefit of this pattern is a dramatic reduction in disk IO. <p/> IMPORTANT: There is no need to specify the output key/value classes for the ChainReducer, this is done by the setReducer or the addMapper for the last element in the chain. <p/> ChainReducer usage pattern: <p/> <pre> ... conf.setJobName("chain"); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); <p/> JobConf mapAConf = new JobConf(false); ... ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class, Text.class, Text.class, true, mapAConf); <p/> JobConf mapBConf = new JobConf(false); ... ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class, LongWritable.class, Text.class, false, mapBConf); <p/> JobConf reduceConf = new JobConf(false); ... ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class, Text.class, Text.class, true, reduceConf); <p/> ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class, LongWritable.class, Text.class, false, null); <p/> ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class, LongWritable.class, LongWritable.class, true, null); <p/> FileInputFormat.setInputPaths(conf, inDir); FileOutputFormat.setOutputPath(conf, outDir); ... <p/> JobClient jc = new JobClient(conf); RunningJob job = jc.submitJob(conf); ... </pre> @deprecated Use {@link org.apache.hadoop.mapreduce.lib.chain.ChainReducer} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.ChainReducer --> <!-- start class org.apache.hadoop.mapred.lib.CombineFileInputFormat --> <class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat}"> <implements name="org.apache.hadoop.mapred.InputFormat"/> <constructor name="CombineFileInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[default constructor]]> </doc> </constructor> <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="numSplits" type="int"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createPool" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="Use {@link #createPool(List)}."> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="filters" type="java.util.List"/> <doc> <![CDATA[Create a new pool and add the filters to it. A split cannot have files from different pools. @deprecated Use {@link #createPool(List)}.]]> </doc> </method> <method name="createPool" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="Use {@link #createPool(PathFilter...)}."> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/> <doc> <![CDATA[Create a new pool and add the filters to it. A pathname can satisfy any one of the specified filters. A split cannot have files from different pools. @deprecated Use {@link #createPool(PathFilter...)}.]]> </doc> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This is not implemented yet.]]> </doc> </method> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An abstract {@link org.apache.hadoop.mapred.InputFormat} that returns {@link CombineFileSplit}'s in {@link org.apache.hadoop.mapred.InputFormat#getSplits(JobConf, int)} method. Splits are constructed from the files under the input paths. A split cannot have files from different pools. Each split returned may contain blocks from different files. If a maxSplitSize is specified, then blocks on the same node are combined to form a single split. Blocks that are left over are then combined with other blocks in the same rack. If maxSplitSize is not specified, then blocks from the same rack are combined in a single split; no attempt is made to create node-local splits. If the maxSplitSize is equal to the block size, then this class is similar to the default spliting behaviour in Hadoop: each block is a locally processed split. Subclasses implement {@link org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, JobConf, Reporter)} to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s. @see CombineFileSplit @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat}]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.CombineFileInputFormat --> <!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReader --> <class name="CombineFileRecordReader" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader}"> <implements name="org.apache.hadoop.mapred.RecordReader"/> <constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.mapred.Reporter, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[A generic RecordReader that can hand out different recordReaders for each chunk in the CombineFileSplit.]]> </doc> </constructor> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="createValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[return the amount of data processed]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[return progress based on the amount of data processed so far.]]> </doc> </method> <method name="initNextRecordReader" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]> </doc> </method> <field name="split" type="org.apache.hadoop.mapred.lib.CombineFileSplit" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="jc" type="org.apache.hadoop.mapred.JobConf" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="reporter" type="org.apache.hadoop.mapred.Reporter" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="rrClass" type="java.lang.Class" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="rrConstructor" type="java.lang.reflect.Constructor" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="fs" type="org.apache.hadoop.fs.FileSystem" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="idx" type="int" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="progress" type="long" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="curReader" type="org.apache.hadoop.mapred.RecordReader" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[A generic RecordReader that can hand out different recordReaders for each chunk in a {@link CombineFileSplit}. A CombineFileSplit can combine data chunks from multiple files. This class allows using different RecordReaders for processing these data chunks from different files. @see CombineFileSplit @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader}]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReader --> <!-- start class org.apache.hadoop.mapred.lib.CombineFileSplit --> <class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.CombineFileSplit}"> <implements name="org.apache.hadoop.mapred.InputSplit"/> <constructor name="CombineFileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.lib.CombineFileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Copy constructor]]> </doc> </constructor> <method name="getJob" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.CombineFileSplit}]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.CombineFileSplit --> <!-- start class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce --> <class name="FieldSelectionMapReduce" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link FieldSelectionMapper} and {@link FieldSelectionReducer} instead"> <implements name="org.apache.hadoop.mapred.Mapper"/> <implements name="org.apache.hadoop.mapred.Reducer"/> <constructor name="FieldSelectionMapReduce" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="val" type="V"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[The identify function. Input key/value pair is written directly to output.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="values" type="java.util.Iterator"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements a mapper/reducer class that can be used to perform field selections in a manner similar to unix cut. The input data is treated as fields separated by a user specified separator (the default value is "\t"). The user can specify a list of fields that form the map output keys, and a list of fields that form the map output values. If the inputformat is TextInputFormat, the mapper will ignore the key to the map function. and the fields are from the value only. Otherwise, the fields are the union of those from the key and those from the value. The field separator is under attribute "mapreduce.fieldsel.data.field.separator" The map output field list spec is under attribute "mapreduce.fieldsel.map.output.key.value.fields.spec". The value is expected to be like "keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range (like 2-5) to specify a range of fields, or an open range (like 3-) specifying all the fields starting from field 3. The open range field spec applies value fields only. They have no effect on the key fields. Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values. The reduce output field list spec is under attribute "mapreduce.fieldsel.reduce.output.key.value.fields.spec". The reducer extracts output key/value pairs in a similar manner, except that the key is never ignored. @deprecated Use {@link FieldSelectionMapper} and {@link FieldSelectionReducer} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce --> <!-- start class org.apache.hadoop.mapred.lib.FilterOutputFormat --> <class name="FilterOutputFormat" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat} instead."> <implements name="org.apache.hadoop.mapred.OutputFormat"/> <constructor name="FilterOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="FilterOutputFormat" type="org.apache.hadoop.mapred.OutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a FilterOutputFormat based on the supplied output format. @param out the underlying OutputFormat]]> </doc> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="baseOut" type="org.apache.hadoop.mapred.OutputFormat" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.FilterOutputFormat --> <!-- start class org.apache.hadoop.mapred.lib.FilterOutputFormat.FilterRecordWriter --> <class name="FilterOutputFormat.FilterRecordWriter" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.RecordWriter"/> <constructor name="FilterOutputFormat.FilterRecordWriter" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <constructor name="FilterOutputFormat.FilterRecordWriter" type="org.apache.hadoop.mapred.RecordWriter" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="rawWriter" type="org.apache.hadoop.mapred.RecordWriter" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[<code>FilterRecordWriter</code> is a convenience wrapper class that implements {@link RecordWriter}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.FilterOutputFormat.FilterRecordWriter --> <!-- start class org.apache.hadoop.mapred.lib.HashPartitioner --> <class name="HashPartitioner" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead."> <implements name="org.apache.hadoop.mapred.Partitioner"/> <constructor name="HashPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="getPartition" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K2"/> <param name="value" type="V2"/> <param name="numReduceTasks" type="int"/> <doc> <![CDATA[Use {@link Object#hashCode()} to partition.]]> </doc> </method> <doc> <![CDATA[Partition keys by their {@link Object#hashCode()}. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.HashPartitioner --> <!-- start class org.apache.hadoop.mapred.lib.IdentityMapper --> <class name="IdentityMapper" extends="org.apache.hadoop.mapred.MapReduceBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead."> <implements name="org.apache.hadoop.mapred.Mapper"/> <constructor name="IdentityMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="val" type="V"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[The identify function. Input key/value pair is written directly to output.]]> </doc> </method> <doc> <![CDATA[Implements the identity function, mapping inputs directly to outputs. @deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.IdentityMapper --> <!-- start class org.apache.hadoop.mapred.lib.IdentityReducer --> <class name="IdentityReducer" extends="org.apache.hadoop.mapred.MapReduceBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead."> <implements name="org.apache.hadoop.mapred.Reducer"/> <constructor name="IdentityReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="values" type="java.util.Iterator"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Writes all keys and values directly to output.]]> </doc> </method> <doc> <![CDATA[Performs no reduction, writing all input values directly to the output. @deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.IdentityReducer --> <!-- start class org.apache.hadoop.mapred.lib.InputSampler --> <class name="InputSampler" extends="org.apache.hadoop.mapreduce.lib.partition.InputSampler" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.partition.InputSampler}"> <constructor name="InputSampler" type="org.apache.hadoop.mapred.JobConf" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="writePartitionFile" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="sampler" type="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/> <exception name="IOException" type="java.io.IOException"/> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.lib.partition.InputSampler}]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.InputSampler --> <!-- start class org.apache.hadoop.mapred.lib.InverseMapper --> <class name="InverseMapper" extends="org.apache.hadoop.mapred.MapReduceBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper} instead."> <implements name="org.apache.hadoop.mapred.Mapper"/> <constructor name="InverseMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[The inverse function. Input keys and values are swapped.]]> </doc> </method> <doc> <![CDATA[A {@link Mapper} that swaps keys and values. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.InverseMapper --> <!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator --> <class name="KeyFieldBasedComparator" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator} instead"> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <constructor name="KeyFieldBasedComparator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <doc> <![CDATA[This comparator implementation provides a subset of the features provided by the Unix/GNU Sort. In particular, the supported features are: -n, (Sort numerically) -r, (Reverse the result of comparison) -k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number of the field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field). opts are ordering options (any of 'nr' as described above). We assume that the fields in the key are separated by {@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPERATOR} @deprecated Use {@link org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator --> <!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner --> <class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner} instead"> <implements name="org.apache.hadoop.mapred.Partitioner"/> <constructor name="KeyFieldBasedPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <doc> <![CDATA[Defines a way to partition keys based on certain key fields (also see {@link KeyFieldBasedComparator}. The key specification supported is of the form -k pos1[,pos2], where, pos is of the form f[.c][opts], where f is the number of the key field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field). @deprecated Use {@link org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner --> <!-- start class org.apache.hadoop.mapred.lib.LazyOutputFormat --> <class name="LazyOutputFormat" extends="org.apache.hadoop.mapred.lib.FilterOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat} instead."> <constructor name="LazyOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setOutputFormatClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the underlying output format for LazyOutputFormat. @param job the {@link JobConf} to modify @param theClass the underlying class]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A Convenience class that creates output lazily. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.LazyOutputFormat --> <!-- start class org.apache.hadoop.mapred.lib.LongSumReducer --> <class name="LongSumReducer" extends="org.apache.hadoop.mapred.MapReduceBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer} instead."> <implements name="org.apache.hadoop.mapred.Reducer"/> <constructor name="LongSumReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="values" type="java.util.Iterator"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A {@link Reducer} that sums long values. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.LongSumReducer --> <!-- start class org.apache.hadoop.mapred.lib.MultipleInputs --> <class name="MultipleInputs" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.MultipleInputs} instead"> <constructor name="MultipleInputs" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="addInputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="inputFormatClass" type="java.lang.Class"/> <doc> <![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of inputs for the map-reduce job. @param conf The configuration of the job @param path {@link Path} to be added to the list of inputs for the job @param inputFormatClass {@link InputFormat} class to use for this path]]> </doc> </method> <method name="addInputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="inputFormatClass" type="java.lang.Class"/> <param name="mapperClass" type="java.lang.Class"/> <doc> <![CDATA[Add a {@link Path} with a custom {@link InputFormat} and {@link Mapper} to the list of inputs for the map-reduce job. @param conf The configuration of the job @param path {@link Path} to be added to the list of inputs for the job @param inputFormatClass {@link InputFormat} class to use for this path @param mapperClass {@link Mapper} class to use for this path]]> </doc> </method> <doc> <![CDATA[This class supports MapReduce jobs that have multiple input paths with a different {@link InputFormat} and {@link Mapper} for each path @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.MultipleInputs} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.MultipleInputs --> <!-- start class org.apache.hadoop.mapred.lib.MultipleOutputFormat --> <class name="MultipleOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead"> <constructor name="MultipleOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="arg3" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create a composite record writer that can write key/value data to different output files @param fs the file system to use @param job the job conf for the job @param name the leaf file name for the output file (such as part-00000") @param arg3 a progressable for reporting progress. @return a composite record writer @throws IOException]]> </doc> </method> <method name="generateLeafFileName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Generate the leaf name for the output file name. The default behavior does not change the leaf file name (such as part-00000) @param name the leaf file name for the output file @return the given leaf file name]]> </doc> </method> <method name="generateFileNameForKeyValue" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Generate the file output file name based on the given key and the leaf file name. The default behavior is that the file name does not depend on the key. @param key the key of the output data @param name the leaf file name @return generated file name]]> </doc> </method> <method name="generateActualKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <doc> <![CDATA[Generate the actual key from the given key/value. The default behavior is that the actual key is equal to the given key @param key the key of the output data @param value the value of the output data @return the actual key derived from the given key/value]]> </doc> </method> <method name="generateActualValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <doc> <![CDATA[Generate the actual value from the given key and value. The default behavior is that the actual value is equal to the given value @param key the key of the output data @param value the value of the output data @return the actual value derived from the given key/value]]> </doc> </method> <method name="getInputFileBasedOutputFileName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Generate the outfile name based on a given anme and the input file name. If the {@link JobContext#MAP_INPUT_FILE} does not exists (i.e. this is not for a map only job), the given name is returned unchanged. If the config value for "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given name is returned unchanged. Otherwise, return a file name consisting of the N trailing legs of the input file name where N is the config value for "num.of.trailing.legs.to.use". @param job the job config @param name the output file name @return the outfile name based on a given anme and the input file name.]]> </doc> </method> <method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="arg3" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@param fs the file system to use @param job a job conf object @param name the name of the file over which a record writer object will be constructed @param arg3 a progressable object @return A RecordWriter object over the given file @throws IOException]]> </doc> </method> <doc> <![CDATA[This abstract class extends the FileOutputFormat, allowing to write the output data to different output files. There are three basic use cases for this class. Case one: This class is used for a map reduce job with at least one reducer. The reducer wants to write data to different files depending on the actual keys. It is assumed that a key (or value) encodes the actual key (value) and the desired location for the actual key (value). Case two: This class is used for a map only job. The job wants to use an output file name that is either a part of the input file name of the input data, or some derivation of it. Case three: This class is used for a map only job. The job wants to use an output file name that depends on both the keys and the input file name, @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.MultipleOutputFormat --> <!-- start class org.apache.hadoop.mapred.lib.MultipleOutputs --> <class name="MultipleOutputs" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead"> <constructor name="MultipleOutputs" type="org.apache.hadoop.mapred.JobConf" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Creates and initializes multiple named outputs support, it should be instantiated in the Mapper/Reducer configure method. @param job the job configuration object]]> </doc> </constructor> <method name="getNamedOutputsList" return="java.util.List" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Returns list of channel names. @param conf job conf @return List of channel Names]]> </doc> </method> <method name="isMultiNamedOutput" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="namedOutput" type="java.lang.String"/> <doc> <![CDATA[Returns if a named output is multiple. @param conf job conf @param namedOutput named output @return <code>true</code> if the name output is multi, <code>false</code> if it is single. If the name output is not defined it returns <code>false</code>]]> </doc> </method> <method name="getNamedOutputFormatClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="namedOutput" type="java.lang.String"/> <doc> <![CDATA[Returns the named output OutputFormat. @param conf job conf @param namedOutput named output @return namedOutput OutputFormat]]> </doc> </method> <method name="getNamedOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="namedOutput" type="java.lang.String"/> <doc> <![CDATA[Returns the key class for a named output. @param conf job conf @param namedOutput named output @return class for the named output key]]> </doc> </method> <method name="getNamedOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="namedOutput" type="java.lang.String"/> <doc> <![CDATA[Returns the value class for a named output. @param conf job conf @param namedOutput named output @return class of named output value]]> </doc> </method> <method name="addNamedOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="namedOutput" type="java.lang.String"/> <param name="outputFormatClass" type="java.lang.Class"/> <param name="keyClass" type="java.lang.Class"/> <param name="valueClass" type="java.lang.Class"/> <doc> <![CDATA[Adds a named output for the job. <p/> @param conf job conf to add the named output @param namedOutput named output name, it has to be a word, letters and numbers only, cannot be the word 'part' as that is reserved for the default output. @param outputFormatClass OutputFormat class. @param keyClass key class @param valueClass value class]]> </doc> </method> <method name="addMultiNamedOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="namedOutput" type="java.lang.String"/> <param name="outputFormatClass" type="java.lang.Class"/> <param name="keyClass" type="java.lang.Class"/> <param name="valueClass" type="java.lang.Class"/> <doc> <![CDATA[Adds a multi named output for the job. <p/> @param conf job conf to add the named output @param namedOutput named output name, it has to be a word, letters and numbers only, cannot be the word 'part' as that is reserved for the default output. @param outputFormatClass OutputFormat class. @param keyClass key class @param valueClass value class]]> </doc> </method> <method name="setCountersEnabled" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="enabled" type="boolean"/> <doc> <![CDATA[Enables or disables counters for the named outputs. <p/> By default these counters are disabled. <p/> MultipleOutputs supports counters, by default the are disabled. The counters group is the {@link MultipleOutputs} class name. </p> The names of the counters are the same as the named outputs. For multi named outputs the name of the counter is the concatenation of the named output, and underscore '_' and the multiname. @param conf job conf to enableadd the named output. @param enabled indicates if the counters will be enabled or not.]]> </doc> </method> <method name="getCountersEnabled" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Returns if the counters for the named outputs are enabled or not. <p/> By default these counters are disabled. <p/> MultipleOutputs supports counters, by default the are disabled. The counters group is the {@link MultipleOutputs} class name. </p> The names of the counters are the same as the named outputs. For multi named outputs the name of the counter is the concatenation of the named output, and underscore '_' and the multiname. @param conf job conf to enableadd the named output. @return TRUE if the counters are enabled, FALSE if they are disabled.]]> </doc> </method> <method name="getNamedOutputs" return="java.util.Iterator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns iterator with the defined name outputs. @return iterator with the defined named outputs]]> </doc> </method> <method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="namedOutput" type="java.lang.String"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Gets the output collector for a named output. <p/> @param namedOutput the named output name @param reporter the reporter @return the output collector for the given named output @throws IOException thrown if output collector could not be created]]> </doc> </method> <method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="namedOutput" type="java.lang.String"/> <param name="multiName" type="java.lang.String"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Gets the output collector for a multi named output. <p/> @param namedOutput the named output name @param multiName the multi name part @param reporter the reporter @return the output collector for the given named output @throws IOException thrown if output collector could not be created]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Closes all the opened named outputs. <p/> If overriden subclasses must invoke <code>super.close()</code> at the end of their <code>close()</code> @throws java.io.IOException thrown if any of the MultipleOutput files could not be closed properly.]]> </doc> </method> <doc> <![CDATA[The MultipleOutputs class simplifies writting to additional outputs other than the job default output via the <code>OutputCollector</code> passed to the <code>map()</code> and <code>reduce()</code> methods of the <code>Mapper</code> and <code>Reducer</code> implementations. <p/> Each additional output, or named output, may be configured with its own <code>OutputFormat</code>, with its own key class and with its own value class. <p/> A named output can be a single file or a multi file. The later is refered as a multi named output. <p/> A multi named output is an unbound set of files all sharing the same <code>OutputFormat</code>, key class and value class configuration. <p/> When named outputs are used within a <code>Mapper</code> implementation, key/values written to a name output are not part of the reduce phase, only key/values written to the job <code>OutputCollector</code> are part of the reduce phase. <p/> MultipleOutputs supports counters, by default the are disabled. The counters group is the {@link MultipleOutputs} class name. </p> The names of the counters are the same as the named outputs. For multi named outputs the name of the counter is the concatenation of the named output, and underscore '_' and the multiname. <p/> Job configuration usage pattern is: <pre> JobConf conf = new JobConf(); conf.setInputPath(inDir); FileOutputFormat.setOutputPath(conf, outDir); conf.setMapperClass(MOMap.class); conf.setReducerClass(MOReduce.class); ... // Defines additional single text based output 'text' for the job MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class, LongWritable.class, Text.class); // Defines additional multi sequencefile based output 'sequence' for the // job MultipleOutputs.addMultiNamedOutput(conf, "seq", SequenceFileOutputFormat.class, LongWritable.class, Text.class); ... JobClient jc = new JobClient(); RunningJob job = jc.submitJob(conf); ... </pre> <p/> Job configuration usage pattern is: <pre> public class MOReduce implements Reducer<WritableComparable, Writable> { private MultipleOutputs mos; public void configure(JobConf conf) { ... mos = new MultipleOutputs(conf); } public void reduce(WritableComparable key, Iterator<Writable> values, OutputCollector output, Reporter reporter) throws IOException { ... mos.getCollector("text", reporter).collect(key, new Text("Hello")); mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye")); mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau")); ... } public void close() throws IOException { mos.close(); ... } } </pre> @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.MultipleOutputs --> <!-- start class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat --> <class name="MultipleSequenceFileOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead"> <constructor name="MultipleSequenceFileOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="arg3" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[This class extends the MultipleOutputFormat, allowing to write the output data to different output files in sequence file output format. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat --> <!-- start class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat --> <class name="MultipleTextOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead"> <constructor name="MultipleTextOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="fs" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="arg3" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[This class extends the MultipleOutputFormat, allowing to write the output data to different output files in Text output format. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.MultipleOutputs} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat --> <!-- start class org.apache.hadoop.mapred.lib.MultithreadedMapRunner --> <class name="MultithreadedMapRunner" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link MultithreadedMapper} instead."> <implements name="org.apache.hadoop.mapred.MapRunnable"/> <constructor name="MultithreadedMapRunner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="input" type="org.apache.hadoop.mapred.RecordReader"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapred.MapRunnable. <p> It can be used instead of the default implementation, @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU bound in order to improve throughput. <p> Map implementations using this MapRunnable must be thread-safe. <p> The Map-Reduce job has to be configured to use this MapRunnable class (using the JobConf.setMapRunnerClass method) and the number of thread the thread-pool can use with the <code>mapred.map.multithreadedrunner.threads</code> property, its default value is 10 threads. <p> @deprecated Use {@link MultithreadedMapper} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.MultithreadedMapRunner --> <!-- start class org.apache.hadoop.mapred.lib.NLineInputFormat --> <class name="NLineInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.NLineInputFormat} instead"> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <constructor name="NLineInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="numSplits" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Logically splits the set of input files for the job, splits N lines of the input as one split. @see org.apache.hadoop.mapred.FileInputFormat#getSplits(JobConf, int)]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> </method> <doc> <![CDATA[NLineInputFormat which splits N lines of input as one split. In many "pleasantly" parallel applications, each process/mapper processes the same input file (s), but with computations are controlled by different parameters.(Referred to as "parameter sweeps"). One way to achieve this, is to specify a set of parameters (one set per line) as input in a control file (which is the input path to the map-reduce application, where as the input dataset is specified via a config variable in JobConf.). The NLineInputFormat can be used in such applications, that splits the input file such that by default, one line is fed as a value to one map task, and key is the offset. i.e. (k,v) is (LongWritable, Text). The location hints will span the whole mapred cluster. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.NLineInputFormat} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.NLineInputFormat --> <!-- start class org.apache.hadoop.mapred.lib.NullOutputFormat --> <class name="NullOutputFormat" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.output.NullOutputFormat} instead."> <implements name="org.apache.hadoop.mapred.OutputFormat"/> <constructor name="NullOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ignored" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <doc> <![CDATA[Consume all outputs and put them in /dev/null. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.output.NullOutputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.NullOutputFormat --> <!-- start class org.apache.hadoop.mapred.lib.RegexMapper --> <class name="RegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.map.RegexMapper}"> <implements name="org.apache.hadoop.mapred.Mapper"/> <constructor name="RegexMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="org.apache.hadoop.io.Text"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A {@link Mapper} that extracts text matching a regular expression. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.map.RegexMapper}]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.RegexMapper --> <!-- start class org.apache.hadoop.mapred.lib.TokenCountMapper --> <class name="TokenCountMapper" extends="org.apache.hadoop.mapred.MapReduceBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead."> <implements name="org.apache.hadoop.mapred.Mapper"/> <constructor name="TokenCountMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="org.apache.hadoop.io.Text"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A {@link Mapper} that maps text values into <token,freq> pairs. Uses {@link StringTokenizer} to break text into tokens. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.TokenCountMapper --> <!-- start class org.apache.hadoop.mapred.lib.TotalOrderPartitioner --> <class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner}"> <implements name="org.apache.hadoop.mapred.Partitioner"/> <constructor name="TotalOrderPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <doc> <![CDATA[Partitioner effecting a total order by reading split points from an externally generated source. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner}]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.TotalOrderPartitioner --> </package> <package name="org.apache.hadoop.mapred.lib.aggregate"> <!-- start class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum --> <class name="DoubleValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="DoubleValueSum" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class implements a value aggregator that sums up a sequence of double values. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMax --> <class name="LongValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="LongValueMax" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class implements a value aggregator that maintain the maximum of a sequence of long values. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMax --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMin --> <class name="LongValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="LongValueMin" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class implements a value aggregator that maintain the minimum of a sequence of long values. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMin --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueSum --> <class name="LongValueSum" extends="org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="LongValueSum" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class implements a value aggregator that sums up a sequence of long values. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueSum --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMax --> <class name="StringValueMax" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="StringValueMax" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class implements a value aggregator that maintain the biggest of a sequence of strings. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMax --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMin --> <class name="StringValueMin" extends="org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="StringValueMin" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class implements a value aggregator that maintain the smallest of a sequence of strings. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMin --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount --> <class name="UniqValueCount" extends="org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="UniqValueCount" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[the default constructor]]> </doc> </constructor> <constructor name="UniqValueCount" type="long" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[constructor @param maxNum the limit in the number of unique values to keep.]]> </doc> </constructor> <doc> <![CDATA[This class implements a value aggregator that dedupes a sequence of objects. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor --> <class name="UserDefinedValueAggregatorDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/> <constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.mapred.JobConf" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param className the class name of the user defined descriptor class @param job a configure object used for decriptor configuration]]> </doc> </constructor> <method name="createInstance" return="java.lang.Object" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="className" type="java.lang.String"/> <doc> <![CDATA[Create an instance of the given class @param className the name of the class @return a dynamically created instance of the given class]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Do nothing.]]> </doc> </method> <doc> <![CDATA[This class implements a wrapper for a user defined value aggregator descriptor. It servs two functions: One is to create an object of ValueAggregatorDescriptor from the name of a user defined class that may be dynamically loaded. The other is to deligate inviokations of generateKeyValPairs function to the created object. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor --> <!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator --> <interface name="ValueAggregator" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator} instead"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <doc> <![CDATA[This interface defines the minimal protocol for value aggregators. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator} instead]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor --> <class name="ValueAggregatorBaseDescriptor" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/> <constructor name="ValueAggregatorBaseDescriptor" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="generateEntry" return="java.util.Map.Entry" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="type" type="java.lang.String"/> <param name="id" type="java.lang.String"/> <param name="val" type="org.apache.hadoop.io.Text"/> <doc> <![CDATA[@param type the aggregation type @param id the aggregation id @param val the val associated with the id to be aggregated @return an Entry whose key is the aggregation id prefixed with the aggregation type.]]> </doc> </method> <method name="generateValueAggregator" return="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="type" type="java.lang.String"/> <doc> <![CDATA[@param type the aggregation type @return a value aggregator of the given type.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[get the input file name. @param job a job configuration object]]> </doc> </method> <field name="UNIQ_VALUE_COUNT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="LONG_VALUE_SUM" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="DOUBLE_VALUE_SUM" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="VALUE_HISTOGRAM" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="LONG_VALUE_MAX" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="LONG_VALUE_MIN" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="STRING_VALUE_MAX" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="STRING_VALUE_MIN" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements the common functionalities of the subclasses of ValueAggregatorDescriptor class. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner --> <class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner} instead"> <constructor name="ValueAggregatorCombiner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Combiner does not need to configure.]]> </doc> </method> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="values" type="java.util.Iterator"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Combines values for a given key. @param key the key is expected to be a Text object, whose prefix indicates the type of aggregation to aggregate the values. @param values the values to combine @param output to collect combined values]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Do nothing.]]> </doc> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="arg0" type="K1"/> <param name="arg1" type="V1"/> <param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="arg3" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Do nothing. Should not be called.]]> </doc> </method> <doc> <![CDATA[This class implements the generic combiner of Aggregate. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner --> <!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor --> <interface name="ValueAggregatorDescriptor" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor} instead"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Configure the object @param job a JobConf object that may contain the information that can be used to configure the object.]]> </doc> </method> <field name="TYPE_SEPARATOR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="ONE" type="org.apache.hadoop.io.Text" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This interface defines the contract a value aggregator descriptor must support. Such a descriptor can be configured with a JobConf object. Its main function is to generate a list of aggregation-id/value pairs. An aggregation id encodes an aggregation type which is used to guide the way to aggregate the value in the reduce/combiner phrase of an Aggregate based job.The mapper in an Aggregate based map/reduce job may create one or more of ValueAggregatorDescriptor objects at configuration time. For each input key/value pair, the mapper will use those objects to create aggregation id/value pairs. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor} instead]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob --> <class name="ValueAggregatorJob" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob} instead"> <constructor name="ValueAggregatorJob" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <param name="descriptors" type="java.lang.Class[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create an Aggregate based map/reduce job. @param args the arguments used for job creation. Generic hadoop arguments are accepted. @return a JobConf object ready for submission. @throws IOException @see GenericOptionsParser]]> </doc> </method> <method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <param name="descriptors" type="java.lang.Class[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setAggregatorDescriptors" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="descriptors" type="java.lang.Class[]"/> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[create and run an Aggregate based map/reduce job. @param args the arguments used for job creation @throws IOException]]> </doc> </method> <doc> <![CDATA[This is the main class for creating a map/reduce job using Aggregate framework. The Aggregate is a specialization of map/reduce framework, specilizing for performing various simple aggregations. Generally speaking, in order to implement an application using Map/Reduce model, the developer is to implement Map and Reduce functions (and possibly combine function). However, a lot of applications related to counting and statistics computing have very similar characteristics. Aggregate abstracts out the general patterns of these functions and implementing those patterns. In particular, the package provides generic mapper/redducer/combiner classes, and a set of built-in value aggregators, and a generic utility class that helps user create map/reduce jobs using the generic class. The built-in aggregators include: sum over numeric values count the number of distinct values compute the histogram of values compute the minimum, maximum, media,average, standard deviation of numeric values The developer using Aggregate will need only to provide a plugin class conforming to the following interface: public interface ValueAggregatorDescriptor { public ArrayList<Entry> generateKeyValPairs(Object key, Object value); public void configure(JobConfjob); } The package also provides a base class, ValueAggregatorBaseDescriptor, implementing the above interface. The user can extend the base class and implement generateKeyValPairs accordingly. The primary work of generateKeyValPairs is to emit one or more key/value pairs based on the input key/value pair. The key in an output key/value pair encode two pieces of information: aggregation type and aggregation id. The value will be aggregated onto the aggregation id according the aggregation type. This class offers a function to generate a map/reduce job using Aggregate framework. The function takes the following parameters: input directory spec input format (text or sequence file) output directory a file specifying the user plugin class @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase --> <class name="ValueAggregatorJobBase" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase} instead"> <implements name="org.apache.hadoop.mapred.Mapper"/> <implements name="org.apache.hadoop.mapred.Reducer"/> <constructor name="ValueAggregatorJobBase" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="logSpec" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <field name="aggregatorDescriptorList" type="java.util.ArrayList" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[This abstract class implements some common functionalities of the the generic mapper, reducer and combiner classes of Aggregate. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper --> <class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper} instead"> <constructor name="ValueAggregatorMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K1"/> <param name="value" type="V1"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[the map function. It iterates through the value aggregator descriptor list to generate aggregation id/value pairs and emit them.]]> </doc> </method> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="arg0" type="org.apache.hadoop.io.Text"/> <param name="arg1" type="java.util.Iterator"/> <param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="arg3" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Do nothing. Should not be called.]]> </doc> </method> <doc> <![CDATA[This class implements the generic mapper of Aggregate. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer --> <class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer} instead"> <constructor name="ValueAggregatorReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="values" type="java.util.Iterator"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@param key the key is expected to be a Text object, whose prefix indicates the type of aggregation to aggregate the values. In effect, data driven computing is achieved. It is assumed that each aggregator's getReport method emits appropriate output for the aggregator. This may be further customiized. @value the values to be aggregated]]> </doc> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="arg0" type="K1"/> <param name="arg1" type="V1"/> <param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="arg3" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Do nothing. Should not be called]]> </doc> </method> <doc> <![CDATA[This class implements the generic reducer of Aggregate. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer --> <!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram --> <class name="ValueHistogram" extends="org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram} instead"> <implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/> <constructor name="ValueHistogram" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[This class implements a value aggregator that computes the histogram of a sequence of strings. @deprecated Use {@link org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram --> </package> <package name="org.apache.hadoop.mapred.lib.db"> <!-- start class org.apache.hadoop.mapred.lib.db.DBConfiguration --> <class name="DBConfiguration" extends="org.apache.hadoop.mapreduce.lib.db.DBConfiguration" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.db.DBConfiguration} instead"> <method name="configureDB" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="driverClass" type="java.lang.String"/> <param name="dbUrl" type="java.lang.String"/> <param name="userName" type="java.lang.String"/> <param name="passwd" type="java.lang.String"/> <doc> <![CDATA[Sets the DB access related fields in the JobConf. @param job the job @param driverClass JDBC Driver class name @param dbUrl JDBC DB access URL. @param userName DB access username @param passwd DB access passwd]]> </doc> </method> <method name="configureDB" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="driverClass" type="java.lang.String"/> <param name="dbUrl" type="java.lang.String"/> <doc> <![CDATA[Sets the DB access related fields in the JobConf. @param job the job @param driverClass JDBC Driver class name @param dbUrl JDBC DB access URL.]]> </doc> </method> <field name="DRIVER_CLASS_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The JDBC Driver class name]]> </doc> </field> <field name="URL_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[JDBC Database access URL]]> </doc> </field> <field name="USERNAME_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[User name to access the database]]> </doc> </field> <field name="PASSWORD_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Password to access the database]]> </doc> </field> <field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Input table name]]> </doc> </field> <field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Field names in the Input table]]> </doc> </field> <field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[WHERE clause in the input SELECT statement]]> </doc> </field> <field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[ORDER BY clause in the input SELECT statement]]> </doc> </field> <field name="INPUT_QUERY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Whole input query, exluding LIMIT...OFFSET]]> </doc> </field> <field name="INPUT_COUNT_QUERY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Input query to get the count of records]]> </doc> </field> <field name="INPUT_CLASS_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Class name implementing DBWritable which will hold input tuples]]> </doc> </field> <field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Output table name]]> </doc> </field> <field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Field names in the Output table]]> </doc> </field> <field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Number of fields in the Output table]]> </doc> </field> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.lib.db.DBConfiguration} instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.db.DBConfiguration --> <!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat --> <class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.db.DBInputFormat} instead."> <implements name="org.apache.hadoop.mapred.InputFormat"/> <implements name="org.apache.hadoop.mapred.JobConfigurable"/> <constructor name="DBInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapred.InputSplit"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="chunks" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="setInput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="inputClass" type="java.lang.Class"/> <param name="tableName" type="java.lang.String"/> <param name="conditions" type="java.lang.String"/> <param name="orderBy" type="java.lang.String"/> <param name="fieldNames" type="java.lang.String[]"/> <doc> <![CDATA[Initializes the map-part of the job with the appropriate input settings. @param job The job @param inputClass the class object implementing DBWritable, which is the Java object holding tuple fields. @param tableName The table to read data from @param conditions The condition which to select data with, eg. '(updated > 20070101 AND length > 0)' @param orderBy the fieldNames in the orderBy clause. @param fieldNames The field names in the table @see #setInput(JobConf, Class, String, String)]]> </doc> </method> <method name="setInput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="inputClass" type="java.lang.Class"/> <param name="inputQuery" type="java.lang.String"/> <param name="inputCountQuery" type="java.lang.String"/> <doc> <![CDATA[Initializes the map-part of the job with the appropriate input settings. @param job The job @param inputClass the class object implementing DBWritable, which is the Java object holding tuple fields. @param inputQuery the input query to select fields. Example : "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" @param inputCountQuery the input query that returns the number of records in the table. Example : "SELECT COUNT(f1) FROM Mytable" @see #setInput(JobConf, Class, String, String, String, String...)]]> </doc> </method> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.lib.db.DBInputFormat} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat --> <!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit --> <class name="DBInputFormat.DBInputSplit" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit" abstract="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.InputSplit"/> <constructor name="DBInputFormat.DBInputSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default Constructor]]> </doc> </constructor> <constructor name="DBInputFormat.DBInputSplit" type="long, long" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Convenience Constructor @param start the index of the first row to select @param end the index of the last row to select]]> </doc> </constructor> <doc> <![CDATA[A InputSplit that spans a set of rows]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit --> <!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader --> <class name="DBInputFormat.DBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader" abstract="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.RecordReader"/> <constructor name="DBInputFormat.DBRecordReader" type="org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.mapred.JobConf, java.sql.Connection, org.apache.hadoop.mapred.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" static="false" final="false" visibility="protected" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[@param split The InputSplit to read data for @throws SQLException]]> </doc> </constructor> <method name="createKey" return="org.apache.hadoop.io.LongWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="createValue" return="T" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.LongWritable"/> <param name="value" type="T"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <doc> <![CDATA[A RecordReader that reads records from a SQL table. Emits LongWritables containing the record number as key and DBWritables as value.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader --> <!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable --> <class name="DBInputFormat.NullDBWritable" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.lib.db.DBWritable"/> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="DBInputFormat.NullDBWritable" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[A Class that does nothing, implementing DBWritable]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable --> <!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat --> <class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="Use org.apache.hadoop.mapreduce.lib.db.DBOutputFormat instead"> <implements name="org.apache.hadoop.mapred.OutputFormat"/> <constructor name="DBOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="name" type="java.lang.String"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="setOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="tableName" type="java.lang.String"/> <param name="fieldNames" type="java.lang.String[]"/> <doc> <![CDATA[Initializes the reduce-part of the job with the appropriate output settings @param job The job @param tableName The table to insert data into @param fieldNames The field names in the table.]]> </doc> </method> <method name="setOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> <param name="tableName" type="java.lang.String"/> <param name="fieldCount" type="int"/> <doc> <![CDATA[Initializes the reduce-part of the job with the appropriate output settings @param job The job @param tableName The table to insert data into @param fieldCount the number of fields in the table.]]> </doc> </method> <doc> <![CDATA[@deprecated Use org.apache.hadoop.mapreduce.lib.db.DBOutputFormat instead]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat --> <!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter --> <class name="DBOutputFormat.DBRecordWriter" extends="org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter" abstract="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.RecordWriter"/> <constructor name="DBOutputFormat.DBRecordWriter" type="java.sql.Connection, java.sql.PreparedStatement" static="false" final="false" visibility="protected" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> </constructor> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <doc> <![CDATA[A RecordWriter that writes the reduce output to a SQL table]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter --> <!-- start interface org.apache.hadoop.mapred.lib.db.DBWritable --> <interface name="DBWritable" abstract="true" static="false" final="false" visibility="public" deprecated="Use {@link org.apache.hadoop.mapreduce.lib.db.DBWritable} instead"> <implements name="org.apache.hadoop.mapreduce.lib.db.DBWritable"/> <doc> <![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.lib.db.DBWritable} instead]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapred.lib.db.DBWritable --> </package> <package name="org.apache.hadoop.mapred.pipes"> <!-- start class org.apache.hadoop.mapred.pipes.Submitter --> <class name="Submitter" extends="org.apache.hadoop.conf.Configured" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="Submitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="Submitter" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getExecutable" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Get the URI of the application's executable. @param conf @return the URI where the application's executable is located]]> </doc> </method> <method name="setExecutable" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="executable" type="java.lang.String"/> <doc> <![CDATA[Set the URI for the application's executable. Normally this is a hdfs: location. @param conf @param executable The URI of the application's executable.]]> </doc> </method> <method name="setIsJavaRecordReader" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="value" type="boolean"/> <doc> <![CDATA[Set whether the job is using a Java RecordReader. @param conf the configuration to modify @param value the new value]]> </doc> </method> <method name="getIsJavaRecordReader" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Check whether the job is using a Java RecordReader @param conf the configuration to check @return is it a Java RecordReader?]]> </doc> </method> <method name="setIsJavaMapper" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="value" type="boolean"/> <doc> <![CDATA[Set whether the Mapper is written in Java. @param conf the configuration to modify @param value the new value]]> </doc> </method> <method name="getIsJavaMapper" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Check whether the job is using a Java Mapper. @param conf the configuration to check @return is it a Java Mapper?]]> </doc> </method> <method name="setIsJavaReducer" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="value" type="boolean"/> <doc> <![CDATA[Set whether the Reducer is written in Java. @param conf the configuration to modify @param value the new value]]> </doc> </method> <method name="getIsJavaReducer" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Check whether the job is using a Java Reducer. @param conf the configuration to check @return is it a Java Reducer?]]> </doc> </method> <method name="setIsJavaRecordWriter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="value" type="boolean"/> <doc> <![CDATA[Set whether the job will use a Java RecordWriter. @param conf the configuration to modify @param value the new value to set]]> </doc> </method> <method name="getIsJavaRecordWriter" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Will the reduce use a Java RecordWriter? @param conf the configuration to check @return true, if the output of the job will be written by Java]]> </doc> </method> <method name="getKeepCommandFile" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <doc> <![CDATA[Does the user want to keep the command file for debugging? If this is true, pipes will write a copy of the command data to a file in the task directory named "downlink.data", which may be used to run the C++ program under the debugger. You probably also want to set JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from being deleted. To run using the data file, set the environment variable "mapreduce.pipes.commandfile" to point to the file. @param conf the configuration to check @return will the framework save the command file?]]> </doc> </method> <method name="setKeepCommandFile" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <param name="keep" type="boolean"/> <doc> <![CDATA[Set whether to keep the command file for debugging @param conf the configuration to modify @param keep the new value]]> </doc> </method> <method name="submitJob" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="Use {@link Submitter#runJob(JobConf)}"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications to the job to run under pipes are made to the configuration. @param conf the job to submit to the cluster (MODIFIED) @throws IOException @deprecated Use {@link Submitter#runJob(JobConf)}]]> </doc> </method> <method name="runJob" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications to the job to run under pipes are made to the configuration. @param conf the job to submit to the cluster (MODIFIED) @throws IOException]]> </doc> </method> <method name="jobSubmit" return="org.apache.hadoop.mapred.RunningJob" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Submit a job to the Map-Reduce framework. This returns a handle to the {@link RunningJob} which can be used to track the running-job. @param conf the job configuration. @return a handle to the {@link RunningJob} which can be used to track the running-job. @throws IOException]]> </doc> </method> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> <doc> <![CDATA[Submit a pipes job based on the command line arguments. @param args]]> </doc> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="PRESERVE_COMMANDFILE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="EXECUTABLE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="INTERPRETOR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="IS_JAVA_MAP" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="IS_JAVA_RR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="IS_JAVA_RW" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="IS_JAVA_REDUCE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="PARTITIONER" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="INPUT_FORMAT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="PORT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[The main entry point and job submitter. It may either be used as a command line-based or API-based method to launch Pipes jobs.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.pipes.Submitter --> </package> <package name="org.apache.hadoop.mapred.tools"> <!-- start class org.apache.hadoop.mapred.tools.MRAdmin --> <class name="MRAdmin" extends="org.apache.hadoop.conf.Configured" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="MRAdmin" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="MRAdmin" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="refreshSuperUserGroupsConfiguration" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[refreshSuperUserGroupsConfiguration {@link JobTracker}. @return exitcode 0 on success, non-zero on failure @throws IOException]]> </doc> </method> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <doc> <![CDATA[Administrative access to Hadoop Map-Reduce. Currently it only provides the ability to connect to the {@link JobTracker} and 1) refresh the service-level authorization policy, 2) refresh queue acl properties.]]> </doc> </class> <!-- end class org.apache.hadoop.mapred.tools.MRAdmin --> </package> <package name="org.apache.hadoop.mapreduce"> <!-- start class org.apache.hadoop.mapreduce.Cluster --> <class name="Cluster" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Cluster" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <constructor name="Cluster" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close the <code>Cluster</code>.]]> </doc> </method> <method name="getFileSystem" return="org.apache.hadoop.fs.FileSystem" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the file system where job-specific files are stored @return object of FileSystem @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getJob" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get job corresponding to jobid. @param jobId @return object of {@link Job} @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get all the queues in cluster. @return array of {@link QueueInfo} @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getQueue" return="org.apache.hadoop.mapreduce.QueueInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get queue information for the specified name. @param name queuename @return object of {@link QueueInfo} @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getClusterStatus" return="org.apache.hadoop.mapreduce.ClusterMetrics" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get current cluster status. @return object of {@link ClusterMetrics} @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getActiveTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get all active trackers in the cluster. @return array of {@link TaskTrackerInfo} @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getBlackListedTaskTrackers" return="org.apache.hadoop.mapreduce.TaskTrackerInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get blacklisted trackers. @return array of {@link TaskTrackerInfo} @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getAllJobs" return="org.apache.hadoop.mapreduce.Job[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get all the jobs in cluster. @return array of {@link Job} @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getSystemDir" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Grab the jobtracker system directory path where job-specific files will be placed. @return the system directory where job-specific files are to be placed.]]> </doc> </method> <method name="getStagingAreaDir" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Grab the jobtracker's view of the staging directory path where job-specific files will be placed. @return the staging directory where job-specific files are to be placed.]]> </doc> </method> <method name="getJobHistoryUrl" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobId" type="org.apache.hadoop.mapreduce.JobID"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the job history file path for a given job id. The job history file at this path may or may not be existing depending on the job completion state. The file is present only for the completed jobs. @param jobId the JobID of the job submitted by the current user. @return the file path of the job history file @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getQueueAclsForCurrentUser" return="org.apache.hadoop.mapreduce.QueueAclsInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Gets the Queue ACLs for current user @return array of QueueAclsInfo object for current user. @throws IOException]]> </doc> </method> <method name="getRootQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Gets the root level queues. @return array of JobQueueInfo object. @throws IOException]]> </doc> </method> <method name="getChildQueues" return="org.apache.hadoop.mapreduce.QueueInfo[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Returns immediate children of queueName. @param queueName @return array of JobQueueInfo which are children of queueName @throws IOException]]> </doc> </method> <method name="getJobTrackerState" return="org.apache.hadoop.mapreduce.server.jobtracker.State" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Use {@link #getJobTrackerStatus()} instead."> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get JobTracker's state @return {@link State} of the JobTracker @throws IOException @throws InterruptedException @deprecated Use {@link #getJobTrackerStatus()} instead.]]> </doc> </method> <method name="getJobTrackerStatus" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the JobTracker's status. @return {@link JobTrackerStatus} of the JobTracker @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getTaskTrackerExpiryInterval" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the tasktracker expiry interval for the cluster @return the expiry interval in msec]]> </doc> </method> <method name="getDelegationToken" return="org.apache.hadoop.security.token.Token" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="renewer" type="org.apache.hadoop.io.Text"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get a delegation token for the user from the JobTracker. @param renewer the user who can renew the token @return the new token @throws IOException]]> </doc> </method> <method name="renewDelegationToken" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="token" type="org.apache.hadoop.security.token.Token"/> <exception name="SecretManager.InvalidToken" type="org.apache.hadoop.security.token.SecretManager.InvalidToken"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Renew a delegation token @param token the token to renew @return the new expiration time @throws InvalidToken @throws IOException]]> </doc> </method> <method name="cancelDelegationToken" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="token" type="org.apache.hadoop.security.token.Token"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Cancel a delegation token from the JobTracker @param token the token to cancel @throws IOException]]> </doc> </method> <doc> <![CDATA[Provides a way to access information about the map/reduce cluster.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Cluster --> <!-- start class org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus --> <class name="Cluster.JobTrackerStatus" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.Cluster.JobTrackerStatus --> <!-- start class org.apache.hadoop.mapreduce.ClusterMetrics --> <class name="ClusterMetrics" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="ClusterMetrics" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="ClusterMetrics" type="int, int, int, int, int, int, int, int, int, int, int, int" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRunningMaps" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of running map tasks in the cluster. @return running maps]]> </doc> </method> <method name="getRunningReduces" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of running reduce tasks in the cluster. @return running reduces]]> </doc> </method> <method name="getOccupiedMapSlots" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get number of occupied map slots in the cluster. @return occupied map slot count]]> </doc> </method> <method name="getOccupiedReduceSlots" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of occupied reduce slots in the cluster. @return occupied reduce slot count]]> </doc> </method> <method name="getReservedMapSlots" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get number of reserved map slots in the cluster. @return reserved map slot count]]> </doc> </method> <method name="getReservedReduceSlots" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of reserved reduce slots in the cluster. @return reserved reduce slot count]]> </doc> </method> <method name="getMapSlotCapacity" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the total number of map slots in the cluster. @return map slot capacity]]> </doc> </method> <method name="getReduceSlotCapacity" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the total number of reduce slots in the cluster. @return reduce slot capacity]]> </doc> </method> <method name="getTotalJobSubmissions" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the total number of job submissions in the cluster. @return total number of job submissions]]> </doc> </method> <method name="getTaskTrackerCount" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of active trackers in the cluster. @return active tracker count.]]> </doc> </method> <method name="getBlackListedTaskTrackerCount" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of blacklisted trackers in the cluster. @return blacklisted tracker count]]> </doc> </method> <method name="getDecommissionedTaskTrackerCount" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of decommissioned trackers in the cluster. @return decommissioned tracker count]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Status information on the current state of the Map-Reduce cluster. <p><code>ClusterMetrics</code> provides clients with information such as: <ol> <li> Size of the cluster. </li> <li> Number of blacklisted and decommissioned trackers. </li> <li> Slot capacity of the cluster. </li> <li> The number of currently occupied/reserved map & reduce slots. </li> <li> The number of currently running map & reduce tasks. </li> <li> The number of job submissions. </li> </ol></p> <p>Clients can query for the latest <code>ClusterMetrics</code>, via {@link Cluster#getClusterStatus()}.</p> @see Cluster]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.ClusterMetrics --> <!-- start class org.apache.hadoop.mapreduce.ContextFactory --> <class name="ContextFactory" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ContextFactory" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="cloneContext" return="org.apache.hadoop.mapreduce.JobContext" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="original" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Clone a job or task attempt context with a new configuration. @param original the original context @param conf the new configuration @return a new context object @throws InterruptedException @throws IOException]]> </doc> </method> <method name="cloneMapContext" return="org.apache.hadoop.mapreduce.Mapper.Context" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.MapContext"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="reader" type="org.apache.hadoop.mapreduce.RecordReader"/> <param name="writer" type="org.apache.hadoop.mapreduce.RecordWriter"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Copy a mapper context, optionally replacing the input and output. @param <K1> input key type @param <V1> input value type @param <K2> output key type @param <V2> output value type @param context the context to clone @param conf a new configuration @param reader Reader to read from. Null means to clone from context. @param writer Writer to write to. Null means to clone from context. @return a new context. it will not be the same class as the original. @throws IOException @throws InterruptedException]]> </doc> </method> <doc> <![CDATA[A factory to allow applications to deal with inconsistencies between MapReduce Context Objects API between hadoop-0.20 and later versions.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.ContextFactory --> <!-- start class org.apache.hadoop.mapreduce.Counter --> <class name="Counter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="Counter" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <constructor name="Counter" type="java.lang.String, java.lang.String" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <constructor name="Counter" type="java.lang.String, java.lang.String, long" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a counter. @param name the name within the group's enum. @param displayName a name to be displayed. @param value the counter value.]]> </doc> </constructor> <method name="setDisplayName" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="displayName" type="java.lang.String"/> </method> <method name="readFields" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read the binary representation of the counter]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Write the binary representation of the counter]]> </doc> </method> <method name="getName" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getDisplayName" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the name of the counter. @return the user facing name of the counter]]> </doc> </method> <method name="getValue" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[What is the current value of this counter? @return the current value]]> </doc> </method> <method name="setValue" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="value" type="long"/> <doc> <![CDATA[Set this counter by the given value @param value the value to set]]> </doc> </method> <method name="increment" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="incr" type="long"/> <doc> <![CDATA[Increment this counter by the given value @param incr the value to increase this counter by]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericRight" type="java.lang.Object"/> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[A named counter that tracks the progress of a map/reduce job. <p><code>Counters</code> represent global counters, defined either by the Map-Reduce framework or applications. Each <code>Counter</code> is named by an {@link Enum} and has a long for the value.</p> <p><code>Counters</code> are bunched into Groups, each comprising of counters from a particular <code>Enum</code> class.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Counter --> <!-- start class org.apache.hadoop.mapreduce.CounterGroup --> <class name="CounterGroup" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <implements name="java.lang.Iterable"/> <constructor name="CounterGroup" type="java.lang.String" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <constructor name="CounterGroup" type="java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a CounterGroup. @param name the name of the group's enum. @param displayName a name to be displayed for the group.]]> </doc> </constructor> <method name="getName" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the internal name of the group @return the internal name]]> </doc> </method> <method name="getDisplayName" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the display name of the group. @return the human readable name]]> </doc> </method> <method name="addCounter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counter" type="org.apache.hadoop.mapreduce.Counter"/> <doc> <![CDATA[Add a counter to this group.]]> </doc> </method> <method name="findCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counterName" type="java.lang.String"/> <param name="displayName" type="java.lang.String"/> <doc> <![CDATA[Find a counter in a group. @param counterName the name of the counter @param displayName the display name of the counter @return the counter that was found or added]]> </doc> </method> <method name="findCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counterName" type="java.lang.String"/> </method> <method name="iterator" return="java.util.Iterator" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="size" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the number of counters in this group.]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericRight" type="java.lang.Object"/> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="incrAllCounters" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="rightGroup" type="org.apache.hadoop.mapreduce.CounterGroup"/> </method> <doc> <![CDATA[A group of {@link Counter}s that logically belong together. Typically, it is an {@link Enum} subclass and the counters are the values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.CounterGroup --> <!-- start class org.apache.hadoop.mapreduce.Counters --> <class name="Counters" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <implements name="java.lang.Iterable"/> <constructor name="Counters" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="Counters" type="org.apache.hadoop.mapred.Counters" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Utility method to create a Counters object from the org.apache.hadoop.mapred counters @param counters]]> </doc> </constructor> <method name="addGroup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="group" type="org.apache.hadoop.mapreduce.CounterGroup"/> <doc> <![CDATA[Add a group.]]> </doc> </method> <method name="findCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="groupName" type="java.lang.String"/> <param name="counterName" type="java.lang.String"/> </method> <method name="findCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Enum"/> <doc> <![CDATA[Find the counter for the given enum. The same enum will always return the same counter. @param key the counter key @return the matching counter object]]> </doc> </method> <method name="getGroupNames" return="java.util.Collection" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the names of all counter classes. @return Set of counter names.]]> </doc> </method> <method name="iterator" return="java.util.Iterator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getGroup" return="org.apache.hadoop.mapreduce.CounterGroup" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="groupName" type="java.lang.String"/> <doc> <![CDATA[Returns the named counter group, or an empty group if there is none with the specified name.]]> </doc> </method> <method name="countCounters" return="int" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the total number of counters, by summing the number of counters in each group.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Write the set of groups. The external format is: #groups (groupName group)* i.e. the number of groups followed by 0 or more groups, where each group is of the form: groupDisplayName #counters (false | true counter)* where each counter is of the form: name (false | true displayName) value]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read a set of groups.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return textual representation of the counter values.]]> </doc> </method> <method name="incrAllCounters" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="org.apache.hadoop.mapreduce.Counters"/> <doc> <![CDATA[Increments multiple counters by their amounts in another Counters instance. @param other the other Counters instance]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericRight" type="java.lang.Object"/> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.mapreduce.Counters --> <!-- start class org.apache.hadoop.mapreduce.ID --> <class name="ID" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.WritableComparable"/> <constructor name="ID" type="int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[constructs an ID object from the given int]]> </doc> </constructor> <constructor name="ID" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="getId" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[returns the int which represents the identifier]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="java.lang.Object"/> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="that" type="org.apache.hadoop.mapreduce.ID"/> <doc> <![CDATA[Compare IDs by associated numbers]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="SEPARATOR" type="char" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="id" type="int" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[A general identifier, which internally stores the id as an integer. This is the super class of {@link JobID}, {@link TaskID} and {@link TaskAttemptID}. @see JobID @see TaskID @see TaskAttemptID]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.ID --> <!-- start class org.apache.hadoop.mapreduce.InputFormat --> <class name="InputFormat" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="InputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getSplits" return="java.util.List" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Logically split the set of input files for the job. <p>Each {@link InputSplit} is then assigned to an individual {@link Mapper} for processing.</p> <p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the input files are not physically split into chunks. For e.g. a split could be <i><input-file-path, start, offset></i> tuple. The InputFormat also creates the {@link RecordReader} to read the {@link InputSplit}. @param context job configuration. @return an array of {@link InputSplit}s for the job.]]> </doc> </method> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Create a record reader for a given split. The framework will call {@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before the split is used. @param split the split to be read @param context the information about the task @return a new record reader @throws IOException @throws InterruptedException]]> </doc> </method> <doc> <![CDATA[<code>InputFormat</code> describes the input-specification for a Map-Reduce job. <p>The Map-Reduce framework relies on the <code>InputFormat</code> of the job to:<p> <ol> <li> Validate the input-specification of the job. <li> Split-up the input file(s) into logical {@link InputSplit}s, each of which is then assigned to an individual {@link Mapper}. </li> <li> Provide the {@link RecordReader} implementation to be used to glean input records from the logical <code>InputSplit</code> for processing by the {@link Mapper}. </li> </ol> <p>The default behavior of file-based {@link InputFormat}s, typically sub-classes of {@link FileInputFormat}, is to split the input into <i>logical</i> {@link InputSplit}s based on the total size, in bytes, of the input files. However, the {@link FileSystem} blocksize of the input files is treated as an upper bound for input splits. A lower bound on the split size can be set via <a href="{@docRoot}/../mapred-default.html#mapreduce.input.fileinputformat.split.minsize"> mapreduce.input.fileinputformat.split.minsize</a>.</p> <p>Clearly, logical splits based on input-size is insufficient for many applications since record boundaries are to respected. In such cases, the application has to also implement a {@link RecordReader} on whom lies the responsibility to respect record-boundaries and present a record-oriented view of the logical <code>InputSplit</code> to the individual task. @see InputSplit @see RecordReader @see FileInputFormat]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.InputFormat --> <!-- start class org.apache.hadoop.mapreduce.InputSplit --> <class name="InputSplit" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="InputSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getLength" return="long" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the size of the split, so that the input splits can be sorted by size. @return the number of bytes in the split @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getLocations" return="java.lang.String[]" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the list of nodes by name where the data for the split would be local. The locations do not need to be serialized. @return a new array of the node nodes. @throws IOException @throws InterruptedException]]> </doc> </method> <doc> <![CDATA[<code>InputSplit</code> represents the data to be processed by an individual {@link Mapper}. <p>Typically, it presents a byte-oriented view on the input and is the responsibility of {@link RecordReader} of the job to process this and present a record-oriented view. @see InputFormat @see RecordReader]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.InputSplit --> <!-- start class org.apache.hadoop.mapreduce.Job --> <class name="Job" extends="org.apache.hadoop.mapreduce.task.JobContextImpl" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.JobContext"/> <constructor name="Job" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <constructor name="Job" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <constructor name="Job" type="org.apache.hadoop.conf.Configuration, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} . A Cluster will be created with a generic {@link Configuration}. @return the {@link Job} , with no connection to a cluster yet. @throws IOException]]> </doc> </method> <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} . A Cluster will be created from the conf parameter only when it's needed. @param conf the configuration @return the {@link Job} , with no connection to a cluster yet. @throws IOException]]> </doc> </method> <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="jobName" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Creates a new {@link Job} with no particular {@link Cluster} and a given jobName. A Cluster will be created from the conf parameter only when it's needed. @param conf the configuration @return the {@link Job} , with no connection to a cluster yet. @throws IOException]]> </doc> </method> <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="cluster" type="org.apache.hadoop.mapreduce.Cluster"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="cluster" type="org.apache.hadoop.mapreduce.Cluster"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getInstance" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="cluster" type="org.apache.hadoop.mapreduce.Cluster"/> <param name="status" type="org.apache.hadoop.mapreduce.JobStatus"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getStatus" return="org.apache.hadoop.mapreduce.JobStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getJobState" return="org.apache.hadoop.mapreduce.JobStatus.State" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Returns the current state of the Job. @return JobStatus#State @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getTrackingURL" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the URL where some job progress information will be displayed. @return the URL where some job progress information will be displayed.]]> </doc> </method> <method name="getJobFile" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the path of the submitted job configuration. @return the path of the submitted job configuration.]]> </doc> </method> <method name="getStartTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get start time of the job. @return the start time of the job]]> </doc> </method> <method name="getFinishTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get finish time of the job. @return the finish time of the job]]> </doc> </method> <method name="getSchedulingInfo" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get scheduling info of the job. @return the scheduling info of the job]]> </doc> </method> <method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get scheduling info of the job. @return the scheduling info of the job]]> </doc> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The user-specified job name.]]> </doc> </method> <method name="getHistoryUrl" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="isRetired" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Dump stats to screen.]]> </doc> </method> <method name="getTaskReports" return="org.apache.hadoop.mapreduce.TaskReport[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the information of the current state of the tasks of a job. @param type Type of the task @return the list of all of the map tips. @throws IOException]]> </doc> </method> <method name="mapProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0 and 1.0. When all map tasks have completed, the function returns 1.0. @return the progress of the job's map-tasks. @throws IOException]]> </doc> </method> <method name="reduceProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0 and 1.0. When all reduce tasks have completed, the function returns 1.0. @return the progress of the job's reduce-tasks. @throws IOException]]> </doc> </method> <method name="cleanupProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0 and 1.0. When all cleanup tasks have completed, the function returns 1.0. @return the progress of the job's cleanup-tasks. @throws IOException]]> </doc> </method> <method name="setupProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0 and 1.0. When all setup tasks have completed, the function returns 1.0. @return the progress of the job's setup-tasks. @throws IOException]]> </doc> </method> <method name="isComplete" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Check if the job is finished or not. This is a non-blocking call. @return <code>true</code> if the job is complete, else <code>false</code>. @throws IOException]]> </doc> </method> <method name="isSuccessful" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Check if the job completed successfully. @return <code>true</code> if the job succeeded, else <code>false</code>. @throws IOException]]> </doc> </method> <method name="killJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Kill the running job. Blocks until all job tasks have been killed as well. If the job is no longer running, it simply returns. @throws IOException]]> </doc> </method> <method name="setPriority" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="priority" type="org.apache.hadoop.mapreduce.JobPriority"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Set the priority of a running job. @param priority the new priority for the job. @throws IOException]]> </doc> </method> <method name="getTaskCompletionEvents" return="org.apache.hadoop.mapreduce.TaskCompletionEvent[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="startFrom" type="int"/> <param name="numEvents" type="int"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get events indicating completion (success/failure) of component tasks. @param startFrom index to start fetching events from @param numEvents number of events to fetch @return an array of {@link TaskCompletionEvent}s @throws IOException]]> </doc> </method> <method name="killTask" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Kill indicated task attempt. @param taskId the id of the task to be terminated. @throws IOException]]> </doc> </method> <method name="failTask" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Fail indicated task attempt. @param taskId the id of the task to be terminated. @throws IOException]]> </doc> </method> <method name="getCounters" return="org.apache.hadoop.mapreduce.Counters" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Gets the counters for this job. May return null if the job has been retired and the job is no longer in the completed job store. @return the counters for this job. @throws IOException]]> </doc> </method> <method name="getTaskDiagnostics" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskid" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Gets the diagnostic messages for a given task attempt. @param taskid @return the list of diagnostic messages for the task @throws IOException]]> </doc> </method> <method name="setNumReduceTasks" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="tasks" type="int"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the number of reduce tasks for the job. @param tasks the number of reduce tasks @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setWorkingDirectory" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="dir" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Set the current working directory for the default file system. @param dir the new current working directory. @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setInputFormatClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the {@link InputFormat} for the job. @param cls the <code>InputFormat</code> to use @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setOutputFormatClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the {@link OutputFormat} for the job. @param cls the <code>OutputFormat</code> to use @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setMapperClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the {@link Mapper} for the job. @param cls the <code>Mapper</code> to use @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setJarByClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <doc> <![CDATA[Set the Jar by finding where a given class came from. @param cls the example class]]> </doc> </method> <method name="setJar" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jar" type="java.lang.String"/> <doc> <![CDATA[Set the job jar]]> </doc> </method> <method name="setUser" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="user" type="java.lang.String"/> <doc> <![CDATA[Set the reported username for this job. @param user the username for this job.]]> </doc> </method> <method name="setCombinerClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the combiner class for the job. @param cls the combiner to use @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setReducerClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the {@link Reducer} for the job. @param cls the <code>Reducer</code> to use @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setPartitionerClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the {@link Partitioner} for the job. @param cls the <code>Partitioner</code> to use @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setMapOutputKeyClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the key class for the map output data. This allows the user to specify the map output key class to be different than the final output value class. @param theClass the map output key class. @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setMapOutputValueClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the value class for the map output data. This allows the user to specify the map output value class to be different than the final output value class. @param theClass the map output value class. @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setOutputKeyClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the key class for the job output data. @param theClass the key class for the job output data. @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setOutputValueClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="theClass" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the value class for job outputs. @param theClass the value class for job outputs. @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setSortComparatorClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Define the comparator that controls how the keys are sorted before they are passed to the {@link Reducer}. @param cls the raw comparator @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setGroupingComparatorClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="cls" type="java.lang.Class"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Define the comparator that controls which keys are grouped together for a single call to {@link Reducer#reduce(Object, Iterable, org.apache.hadoop.mapreduce.Reducer.Context)} @param cls the raw comparator to use @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setJobName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> <exception name="IllegalStateException" type="java.lang.IllegalStateException"/> <doc> <![CDATA[Set the user-specified job name. @param name the job's new name. @throws IllegalStateException if the job is submitted]]> </doc> </method> <method name="setSpeculativeExecution" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="speculativeExecution" type="boolean"/> <doc> <![CDATA[Turn speculative execution on or off for this job. @param speculativeExecution <code>true</code> if speculative execution should be turned on, else <code>false</code>.]]> </doc> </method> <method name="setMapSpeculativeExecution" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="speculativeExecution" type="boolean"/> <doc> <![CDATA[Turn speculative execution on or off for this job for map tasks. @param speculativeExecution <code>true</code> if speculative execution should be turned on for map tasks, else <code>false</code>.]]> </doc> </method> <method name="setReduceSpeculativeExecution" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="speculativeExecution" type="boolean"/> <doc> <![CDATA[Turn speculative execution on or off for this job for reduce tasks. @param speculativeExecution <code>true</code> if speculative execution should be turned on for reduce tasks, else <code>false</code>.]]> </doc> </method> <method name="setJobSetupCleanupNeeded" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="needed" type="boolean"/> <doc> <![CDATA[Specify whether job-setup and job-cleanup is needed for the job @param needed If <code>true</code>, job-setup and job-cleanup will be considered from {@link OutputCommitter} else ignored.]]> </doc> </method> <method name="setCacheArchives" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="archives" type="java.net.URI[]"/> <doc> <![CDATA[Set the given set of archives @param archives The list of archives that need to be localized]]> </doc> </method> <method name="setCacheFiles" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="files" type="java.net.URI[]"/> <doc> <![CDATA[Set the given set of files @param files The list of files that need to be localized]]> </doc> </method> <method name="addCacheArchive" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="uri" type="java.net.URI"/> <doc> <![CDATA[Add a archives to be localized @param uri The uri of the cache to be localized]]> </doc> </method> <method name="addCacheFile" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="uri" type="java.net.URI"/> <doc> <![CDATA[Add a file to be localized @param uri The uri of the cache to be localized]]> </doc> </method> <method name="addFileToClassPath" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="file" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add an file path to the current set of classpath entries It adds the file to cache as well. Files added with this method will not be unpacked while being added to the classpath. To add archives to classpath, use the {@link #addArchiveToClassPath(Path)} method instead. @param file Path of the file to be added]]> </doc> </method> <method name="addArchiveToClassPath" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="archive" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add an archive path to the current set of classpath entries. It adds the archive to cache as well. Archive files will be unpacked and added to the classpath when being distributed. @param archive Path of the archive to be added]]> </doc> </method> <method name="createSymlink" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[This method allows you to create symlinks in the current working directory of the task to all the cache files/archives]]> </doc> </method> <method name="setMaxMapAttempts" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="n" type="int"/> <doc> <![CDATA[Expert: Set the number of maximum attempts that will be made to run a map task. @param n the number of attempts per map task.]]> </doc> </method> <method name="setMaxReduceAttempts" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="n" type="int"/> <doc> <![CDATA[Expert: Set the number of maximum attempts that will be made to run a reduce task. @param n the number of attempts per reduce task.]]> </doc> </method> <method name="setProfileEnabled" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="newValue" type="boolean"/> <doc> <![CDATA[Set whether the system should collect profiler information for some of the tasks in this job? The information is stored in the user log directory. @param newValue true means it should be gathered]]> </doc> </method> <method name="setProfileParams" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="value" type="java.lang.String"/> <doc> <![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it will be replaced with the name of the profiling output file when the task runs. This value is passed to the task child JVM on the command line. @param value the configuration string]]> </doc> </method> <method name="setProfileTaskRange" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="isMap" type="boolean"/> <param name="newValue" type="java.lang.String"/> <doc> <![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true) must also be called. @param newValue a set of integer ranges of the map ids]]> </doc> </method> <method name="setCancelDelegationTokenUponJobCompletion" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="value" type="boolean"/> <doc> <![CDATA[Sets the flag that will allow the JobTracker to cancel the HDFS delegation tokens upon job completion. Defaults to true.]]> </doc> </method> <method name="submit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Submit the job to the cluster and return immediately. @throws IOException]]> </doc> </method> <method name="waitForCompletion" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="verbose" type="boolean"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Submit the job to the cluster and wait for it to finish. @param verbose print the progress to the user @return true if the job succeeded @throws IOException thrown if the communication with the <code>JobTracker</code> is lost]]> </doc> </method> <method name="monitorAndPrintJob" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Monitor a job and print status in real-time as progress is made and tasks fail. @return true if the job succeeded @throws IOException if communication to the JobTracker fails]]> </doc> </method> <method name="getProgressPollInterval" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[The interval at which monitorAndPrintJob() prints status]]> </doc> </method> <method name="getCompletionPollInterval" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[The interval at which waitForCompletion() should check.]]> </doc> </method> <method name="getTaskOutputFilter" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the task output filter. @param conf the configuration. @return the filter level.]]> </doc> </method> <method name="setTaskOutputFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="newValue" type="org.apache.hadoop.mapreduce.Job.TaskStatusFilter"/> <doc> <![CDATA[Modify the Configuration to set the task output filter. @param conf the Configuration to modify. @param newValue the value to set.]]> </doc> </method> <field name="OUTPUT_FILTER" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="COMPLETION_POLL_INTERVAL_KEY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Key in mapred-*.xml that sets completionPollInvervalMillis]]> </doc> </field> <field name="PROGRESS_MONITOR_POLL_INTERVAL_KEY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Key in mapred-*.xml that sets progMonitorPollIntervalMillis]]> </doc> </field> <field name="USED_GENERIC_PARSER" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="SUBMIT_REPLICATION" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[The job submitter's view of the Job. <p>It allows the user to configure the job, submit it, control its execution, and query the state. The set methods only work until the job is submitted, afterwards they will throw an IllegalStateException. </p> <p> Normally the user creates the application, describes various facets of the job via {@link Job} and then submits the job and monitor its progress.</p> <p>Here is an example on how to submit a job:</p> <p><blockquote><pre> // Create a new Job Job job = new Job(new Configuration()); job.setJarByClass(MyJob.class); // Specify various job-specific parameters job.setJobName("myjob"); job.setInputPath(new Path("in")); job.setOutputPath(new Path("out")); job.setMapperClass(MyJob.MyMapper.class); job.setReducerClass(MyJob.MyReducer.class); // Submit the job, then poll for progress until the job is complete job.waitForCompletion(true); </pre></blockquote></p>]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Job --> <!-- start class org.apache.hadoop.mapreduce.Job.JobState --> <class name="Job.JobState" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.Job.JobState[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.Job.JobState" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.Job.JobState --> <!-- start class org.apache.hadoop.mapreduce.Job.TaskStatusFilter --> <class name="Job.TaskStatusFilter" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.Job.TaskStatusFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.Job.TaskStatusFilter --> <!-- start interface org.apache.hadoop.mapreduce.JobContext --> <interface name="JobContext" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.MRJobConfig"/> <method name="getConfiguration" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the configuration for the job. @return the shared configuration object]]> </doc> </method> <method name="getCredentials" return="org.apache.hadoop.security.Credentials" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get credentials for the job. @return credentials for the job]]> </doc> </method> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the unique ID for the job. @return the object with the job id]]> </doc> </method> <method name="getNumReduceTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get configured the number of reduce tasks for this job. Defaults to <code>1</code>. @return the number of reduce tasks for this job.]]> </doc> </method> <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the current working directory for the default file system. @return the directory name.]]> </doc> </method> <method name="getOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the key class for the job output data. @return the key class for the job output data.]]> </doc> </method> <method name="getOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the value class for job outputs. @return the value class for job outputs.]]> </doc> </method> <method name="getMapOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the key class for the map output data. If it is not set, use the (final) output key class. This allows the map output key class to be different than the final output key class. @return the map output key class.]]> </doc> </method> <method name="getMapOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the value class for the map output data. If it is not set, use the (final) output value class This allows the map output value class to be different than the final output value class. @return the map output value class.]]> </doc> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user-specified job name. This is only used to identify the job to the user. @return the job's name, defaulting to "".]]> </doc> </method> <method name="getInputFormatClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Get the {@link InputFormat} class for the job. @return the {@link InputFormat} class for the job.]]> </doc> </method> <method name="getMapperClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Get the {@link Mapper} class for the job. @return the {@link Mapper} class for the job.]]> </doc> </method> <method name="getCombinerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Get the combiner class for the job. @return the combiner class for the job.]]> </doc> </method> <method name="getReducerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Get the {@link Reducer} class for the job. @return the {@link Reducer} class for the job.]]> </doc> </method> <method name="getOutputFormatClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Get the {@link OutputFormat} class for the job. @return the {@link OutputFormat} class for the job.]]> </doc> </method> <method name="getPartitionerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[Get the {@link Partitioner} class for the job. @return the {@link Partitioner} class for the job.]]> </doc> </method> <method name="getSortComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link RawComparator} comparator used to compare keys. @return the {@link RawComparator} comparator used to compare keys.]]> </doc> </method> <method name="getJar" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the pathname of the job's jar. @return the pathname]]> </doc> </method> <method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user defined {@link RawComparator} comparator for grouping keys of inputs to the reduce. @return comparator set by the user for grouping values. @see Job#setGroupingComparatorClass(Class) for details.]]> </doc> </method> <method name="getJobSetupCleanupNeeded" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get whether job-setup and job-cleanup is needed for the job @return boolean]]> </doc> </method> <method name="getProfileEnabled" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get whether the task profiling is enabled. @return true if some tasks will be profiled]]> </doc> </method> <method name="getProfileParams" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the profiler configuration arguments. The default value for this property is "-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s" @return the parameters to pass to the task child to configure profiling]]> </doc> </method> <method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="isMap" type="boolean"/> <doc> <![CDATA[Get the range of maps or reduces to profile. @param isMap is the task a map? @return the task ranges]]> </doc> </method> <method name="getUser" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the reported username for this job. @return the username]]> </doc> </method> <method name="getSymlink" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[This method checks to see if symlinks are to be create for the localized cache files in the current working directory @return true if symlinks are to be created- else return false]]> </doc> </method> <method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the archive entries in classpath as an array of Path]]> </doc> </method> <method name="getCacheArchives" return="java.net.URI[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get cache archives set in the Configuration @return A URI array of the caches set in the Configuration @throws IOException]]> </doc> </method> <method name="getCacheFiles" return="java.net.URI[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get cache files set in the Configuration @return A URI array of the files set in the Configuration @throws IOException]]> </doc> </method> <method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return the path array of the localized caches @return A path array of localized caches @throws IOException]]> </doc> </method> <method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return the path array of the localized files @return A path array of localized files @throws IOException]]> </doc> </method> <method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the file entries in classpath as an array of Path]]> </doc> </method> <method name="getArchiveTimestamps" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the timestamps of the archives. Used by internal DistributedCache and MapReduce code. @return a string array of timestamps @throws IOException]]> </doc> </method> <method name="getFileTimestamps" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the timestamps of the files. Used by internal DistributedCache and MapReduce code. @return a string array of timestamps @throws IOException]]> </doc> </method> <method name="getMaxMapAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the configured number of maximum attempts that will be made to run a map task, as specified by the <code>mapred.map.max.attempts</code> property. If this property is not already set, the default is 4 attempts. @return the max number of attempts per map task.]]> </doc> </method> <method name="getMaxReduceAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the configured number of maximum attempts that will be made to run a reduce task, as specified by the <code>mapred.reduce.max.attempts</code> property. If this property is not already set, the default is 4 attempts. @return the max number of attempts per reduce task.]]> </doc> </method> <doc> <![CDATA[A read-only view of the job that is provided to the tasks while they are running.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.JobContext --> <!-- start class org.apache.hadoop.mapreduce.JobCounter --> <class name="JobCounter" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.JobCounter[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.JobCounter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.JobCounter --> <!-- start class org.apache.hadoop.mapreduce.JobID --> <class name="JobID" extends="org.apache.hadoop.mapred.ID" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.lang.Comparable"/> <constructor name="JobID" type="java.lang.String, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a JobID object @param jtIdentifier jobTracker identifier @param id job number]]> </doc> </constructor> <constructor name="JobID" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getJtIdentifier" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="java.lang.Object"/> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="org.apache.hadoop.mapreduce.ID"/> <doc> <![CDATA[Compare JobIds by first jtIdentifiers, then by job numbers]]> </doc> </method> <method name="appendTo" return="java.lang.StringBuilder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="builder" type="java.lang.StringBuilder"/> <doc> <![CDATA[Add the stuff after the "job" prefix to the given builder. This is useful, because the sub-ids use this substring at the start of their string. @param builder the builder to append to @return the builder that was passed in]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="forName" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="str" type="java.lang.String"/> <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> <doc> <![CDATA[Construct a JobId object from given string @return constructed JobId object or null if the given String is null @throws IllegalArgumentException if the given string is malformed]]> </doc> </method> <field name="JOB" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="JOBID_REGEX" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="idFormat" type="java.text.NumberFormat" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[JobID represents the immutable and unique identifier for the job. JobID consists of two parts. First part represents the jobtracker identifier, so that jobID to jobtracker map is defined. For cluster setup this string is the jobtracker start time, for local setting, it is "local". Second part of the JobID is the job number. <br> An example JobID is : <code>job_200707121733_0003</code> , which represents the third job running at the jobtracker started at <code>200707121733</code>. <p> Applications should never construct or parse JobID strings, but rather use appropriate constructors or {@link #forName(String)} method. @see TaskID @see TaskAttemptID @see org.apache.hadoop.mapred.JobTracker#getNewJobId() @see org.apache.hadoop.mapred.JobTracker#getStartTime()]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.JobID --> <!-- start class org.apache.hadoop.mapreduce.JobPriority --> <class name="JobPriority" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.JobPriority[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.JobPriority" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <doc> <![CDATA[Used to describe the priority of the running job.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.JobPriority --> <!-- start class org.apache.hadoop.mapreduce.JobStatus --> <class name="JobStatus" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <implements name="java.lang.Cloneable"/> <constructor name="JobStatus" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="JobStatus" type="org.apache.hadoop.mapreduce.JobID, float, float, float, float, org.apache.hadoop.mapreduce.JobStatus.State, org.apache.hadoop.mapreduce.JobPriority, java.lang.String, java.lang.String, java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a job status object for a given jobid. @param jobid The jobid of the job @param setupProgress The progress made on the setup @param mapProgress The progress made on the maps @param reduceProgress The progress made on the reduces @param cleanupProgress The progress made on the cleanup @param runState The current state of the job @param jp Priority of the job. @param user userid of the person who submitted the job. @param jobName user-specified job name. @param jobFile job configuration file. @param trackingUrl link to the web-ui for details of the job.]]> </doc> </constructor> <method name="setMapProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the map progress of this job @param p The value of map progress to set to]]> </doc> </method> <method name="setCleanupProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the cleanup progress of this job @param p The value of cleanup progress to set to]]> </doc> </method> <method name="setSetupProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the setup progress of this job @param p The value of setup progress to set to]]> </doc> </method> <method name="setReduceProgress" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="p" type="float"/> <doc> <![CDATA[Sets the reduce progress of this Job @param p The value of reduce progress to set to]]> </doc> </method> <method name="setPriority" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="jp" type="org.apache.hadoop.mapreduce.JobPriority"/> <doc> <![CDATA[Set the priority of the job, defaulting to NORMAL. @param jp new job priority]]> </doc> </method> <method name="setFinishTime" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="finishTime" type="long"/> <doc> <![CDATA[Set the finish time of the job @param finishTime The finishTime of the job]]> </doc> </method> <method name="setHistoryFile" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="historyFile" type="java.lang.String"/> <doc> <![CDATA[Set the job history file url for a completed job]]> </doc> </method> <method name="setTrackingUrl" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="trackingUrl" type="java.lang.String"/> <doc> <![CDATA[Set the link to the web-ui for details of the job.]]> </doc> </method> <method name="setRetired" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Set the job retire flag to true.]]> </doc> </method> <method name="setState" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/> <doc> <![CDATA[Change the current run state of the job.]]> </doc> </method> <method name="setStartTime" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="startTime" type="long"/> <doc> <![CDATA[Set the start time of the job @param startTime The startTime of the job]]> </doc> </method> <method name="setUsername" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="userName" type="java.lang.String"/> <doc> <![CDATA[@param userName The username of the job]]> </doc> </method> <method name="setSchedulingInfo" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="schedulingInfo" type="java.lang.String"/> <doc> <![CDATA[Used to set the scheduling information associated to a particular Job. @param schedulingInfo Scheduling information of the job]]> </doc> </method> <method name="setJobACLs" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="acls" type="java.util.Map"/> <doc> <![CDATA[Set the job acls. @param acls {@link Map} from {@link JobACL} to {@link AccessControlList}]]> </doc> </method> <method name="getMapProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in maps]]> </doc> </method> <method name="getCleanupProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in cleanup]]> </doc> </method> <method name="getSetupProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in setup]]> </doc> </method> <method name="getReduceProgress" return="float" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Percentage of progress in reduce]]> </doc> </method> <method name="getState" return="org.apache.hadoop.mapreduce.JobStatus.State" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return running state of the job]]> </doc> </method> <method name="getStartTime" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return start time of the job]]> </doc> </method> <method name="clone" return="java.lang.Object" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return The jobid of the Job]]> </doc> </method> <method name="getUsername" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the username of the job]]> </doc> </method> <method name="getSchedulingInfo" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets the Scheduling information associated to a particular Job. @return the scheduling information of the job]]> </doc> </method> <method name="getJobACLs" return="java.util.Map" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the job acls. @return a {@link Map} from {@link JobACL} to {@link AccessControlList}]]> </doc> </method> <method name="getPriority" return="org.apache.hadoop.mapreduce.JobPriority" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the priority of the job @return job priority]]> </doc> </method> <method name="isJobComplete" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns true if the status is for a completed job.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user-specified job name.]]> </doc> </method> <method name="getJobFile" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the configuration file for the job.]]> </doc> </method> <method name="getTrackingUrl" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the link to the web-ui for details of the job.]]> </doc> </method> <method name="getFinishTime" return="long" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the finish time of the job.]]> </doc> </method> <method name="isRetired" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Check whether the job has retired.]]> </doc> </method> <method name="getHistoryFile" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the job history file name for a completed job. If job is not completed or history file not available then return null.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Describes the current status of a job.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.JobStatus --> <!-- start class org.apache.hadoop.mapreduce.JobStatus.State --> <class name="JobStatus.State" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.JobStatus.State[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.JobStatus.State" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <method name="getValue" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Current state of the job]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.JobStatus.State --> <!-- start interface org.apache.hadoop.mapreduce.MapContext --> <interface name="MapContext" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> <method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the input split for this map.]]> </doc> </method> <doc> <![CDATA[The context that is given to the {@link Mapper}. @param <KEYIN> the key input type to the Mapper @param <VALUEIN> the value input type to the Mapper @param <KEYOUT> the key output type from the Mapper @param <VALUEOUT> the value output type from the Mapper]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.MapContext --> <!-- start class org.apache.hadoop.mapreduce.Mapper --> <class name="Mapper" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Mapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Called once at the beginning of the task.]]> </doc> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="key" type="KEYIN"/> <param name="value" type="VALUEIN"/> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Called once for each key/value pair in the input split. Most applications should override this, but the default is the identity function.]]> </doc> </method> <method name="cleanup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Called once at the end of the task.]]> </doc> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Expert users can override this method for more complete control over the execution of the Mapper. @param context @throws IOException]]> </doc> </method> <doc> <![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs. <p>Maps are the individual tasks which transform input records into a intermediate records. The transformed intermediate records need not be of the same type as the input records. A given input pair may map to zero or many output pairs.</p> <p>The Hadoop Map-Reduce framework spawns one map task for each {@link InputSplit} generated by the {@link InputFormat} for the job. <code>Mapper</code> implementations can access the {@link Configuration} for the job via the {@link JobContext#getConfiguration()}. <p>The framework first calls {@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by {@link #map(Object, Object, Context)} for each key/value pair in the <code>InputSplit</code>. Finally {@link #cleanup(Context)} is called.</p> <p>All intermediate values associated with a given output key are subsequently grouped by the framework, and passed to a {@link Reducer} to determine the final output. Users can control the sorting and grouping by specifying two key {@link RawComparator} classes.</p> <p>The <code>Mapper</code> outputs are partitioned per <code>Reducer</code>. Users can control which keys (and hence records) go to which <code>Reducer</code> by implementing a custom {@link Partitioner}. <p>Users can optionally specify a <code>combiner</code>, via {@link Job#setCombinerClass(Class)}, to perform local aggregation of the intermediate outputs, which helps to cut down the amount of data transferred from the <code>Mapper</code> to the <code>Reducer</code>. <p>Applications can specify if and how the intermediate outputs are to be compressed and which {@link CompressionCodec}s are to be used via the <code>Configuration</code>.</p> <p>If the job has zero reduces then the output of the <code>Mapper</code> is directly written to the {@link OutputFormat} without sorting by keys.</p> <p>Example:</p> <p><blockquote><pre> public class TokenCounterMapper extends Mapper<Object, Text, Text, IntWritable>{ private final static IntWritable one = new IntWritable(1); private Text word = new Text(); public void map(Object key, Text value, Context context) throws IOException, InterruptedException { StringTokenizer itr = new StringTokenizer(value.toString()); while (itr.hasMoreTokens()) { word.set(itr.nextToken()); context.write(word, one); } } } </pre></blockquote></p> <p>Applications may override the {@link #run(Context)} method to exert greater control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p> @see InputFormat @see JobContext @see Partitioner @see Reducer]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Mapper --> <!-- start class org.apache.hadoop.mapreduce.Mapper.Context --> <class name="Mapper.Context" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.MapContext"/> <constructor name="Mapper.Context" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[The <code>Context</code> passed on to the {@link Mapper} implementations.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Mapper.Context --> <!-- start class org.apache.hadoop.mapreduce.MarkableIterator --> <class name="MarkableIterator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.MarkableIteratorInterface"/> <constructor name="MarkableIterator" type="java.util.Iterator" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a new iterator layered on the input iterator @param itr underlying iterator that implements MarkableIteratorInterface]]> </doc> </constructor> <method name="mark" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clearMark" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="VALUE" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="remove" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[<code>MarkableIterator</code> is a wrapper iterator class that implements the {@link MarkableIteratorInterface}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.MarkableIterator --> <!-- start class org.apache.hadoop.mapreduce.OutputCommitter --> <class name="OutputCommitter" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="OutputCommitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setupJob" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For the framework to setup the job output during initialization @param jobContext Context of the job whose output is being written. @throws IOException if temporary output could not be created]]> </doc> </method> <method name="cleanupJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Use {@link #commitJob(JobContext)} or {@link #abortJob(JobContext, JobStatus.State)} instead."> <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For cleaning up the job's output after job completion @param jobContext Context of the job whose output is being written. @throws IOException @deprecated Use {@link #commitJob(JobContext)} or {@link #abortJob(JobContext, JobStatus.State)} instead.]]> </doc> </method> <method name="commitJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For committing job's output after successful job completion. Note that this is invoked for jobs with final runstate as SUCCESSFUL. @param jobContext Context of the job whose output is being written. @throws IOException]]> </doc> </method> <method name="abortJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For aborting an unsuccessful job's output. Note that this is invoked for jobs with final runstate as {@link JobStatus.State#FAILED} or {@link JobStatus.State#KILLED}. @param jobContext Context of the job whose output is being written. @param state final runstate of the job @throws IOException]]> </doc> </method> <method name="setupTask" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Sets up output for the task. @param taskContext Context of the task whose output is being written. @throws IOException]]> </doc> </method> <method name="needsTaskCommit" return="boolean" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Check whether task needs a commit @param taskContext @return true/false @throws IOException]]> </doc> </method> <method name="commitTask" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[To promote the task's temporary output to final output location The task's output is moved to the job's output directory. @param taskContext Context of the task whose output is being written. @throws IOException if commit is not]]> </doc> </method> <method name="abortTask" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Discard the task output @param taskContext @throws IOException]]> </doc> </method> <doc> <![CDATA[<code>OutputCommitter</code> describes the commit of task output for a Map-Reduce job. <p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of the job to:<p> <ol> <li> Setup the job during initialization. For example, create the temporary output directory for the job during the initialization of the job. </li> <li> Cleanup the job after the job completion. For example, remove the temporary output directory after the job completion. </li> <li> Setup the task temporary output. </li> <li> Check whether a task needs a commit. This is to avoid the commit procedure if a task does not need commit. </li> <li> Commit of the task output. </li> <li> Discard the task commit. </li> </ol> @see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter @see JobContext @see TaskAttemptContext]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.OutputCommitter --> <!-- start class org.apache.hadoop.mapreduce.OutputFormat --> <class name="OutputFormat" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="OutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the {@link RecordWriter} for the given task. @param context the information about the current task. @return a {@link RecordWriter} to write the output for the job. @throws IOException]]> </doc> </method> <method name="checkOutputSpecs" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Check for validity of the output-specification for the job. <p>This is to validate the output specification for the job when it is a job is submitted. Typically checks that it does not already exist, throwing an exception when it already exists, so that output is not overwritten.</p> @param context information about the job @throws IOException when output should not be attempted]]> </doc> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the output committer for this output format. This is responsible for ensuring the output is committed correctly. @param context the task context @return an output committer @throws IOException @throws InterruptedException]]> </doc> </method> <doc> <![CDATA[<code>OutputFormat</code> describes the output-specification for a Map-Reduce job. <p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the job to:<p> <ol> <li> Validate the output-specification of the job. For e.g. check that the output directory doesn't already exist. <li> Provide the {@link RecordWriter} implementation to be used to write out the output files of the job. Output files are stored in a {@link FileSystem}. </li> </ol> @see RecordWriter]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.OutputFormat --> <!-- start class org.apache.hadoop.mapreduce.Partitioner --> <class name="Partitioner" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Partitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getPartition" return="int" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="KEY"/> <param name="value" type="VALUE"/> <param name="numPartitions" type="int"/> <doc> <![CDATA[Get the partition number for a given key (hence record) given the total number of partitions i.e. number of reduce-tasks for the job. <p>Typically a hash function on a all or a subset of the key.</p> @param key the key to be partioned. @param value the entry value. @param numPartitions the total number of partitions. @return the partition number for the <code>key</code>.]]> </doc> </method> <doc> <![CDATA[Partitions the key space. <p><code>Partitioner</code> controls the partitioning of the keys of the intermediate map-outputs. The key (or a subset of the key) is used to derive the partition, typically by a hash function. The total number of partitions is the same as the number of reduce tasks for the job. Hence this controls which of the <code>m</code> reduce tasks the intermediate key (and hence the record) is sent for reduction.</p> @see Reducer]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Partitioner --> <!-- start class org.apache.hadoop.mapreduce.QueueAclsInfo --> <class name="QueueAclsInfo" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="QueueAclsInfo" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default constructor for QueueAclsInfo.]]> </doc> </constructor> <constructor name="QueueAclsInfo" type="java.lang.String, java.lang.String[]" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a new QueueAclsInfo object using the queue name and the queue operations array @param queueName Name of the job queue @param operations]]> </doc> </constructor> <method name="getQueueName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get queue name. @return name]]> </doc> </method> <method name="setQueueName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> </method> <method name="getOperations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get opearations allowed on queue. @return array of String]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Class to encapsulate Queue ACLs for a particular user.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.QueueAclsInfo --> <!-- start class org.apache.hadoop.mapreduce.QueueInfo --> <class name="QueueInfo" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="QueueInfo" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default constructor for QueueInfo.]]> </doc> </constructor> <constructor name="QueueInfo" type="java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a new QueueInfo object using the queue name and the scheduling information passed. @param queueName Name of the job queue @param schedulingInfo Scheduling Information associated with the job queue]]> </doc> </constructor> <constructor name="QueueInfo" type="java.lang.String, java.lang.String, org.apache.hadoop.mapreduce.QueueState, org.apache.hadoop.mapreduce.JobStatus[]" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param queueName @param schedulingInfo @param state @param stats]]> </doc> </constructor> <method name="setQueueName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="queueName" type="java.lang.String"/> <doc> <![CDATA[Set the queue name of the JobQueueInfo @param queueName Name of the job queue.]]> </doc> </method> <method name="getQueueName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the queue name from JobQueueInfo @return queue name]]> </doc> </method> <method name="setSchedulingInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="schedulingInfo" type="java.lang.String"/> <doc> <![CDATA[Set the scheduling information associated to particular job queue @param schedulingInfo]]> </doc> </method> <method name="getSchedulingInfo" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets the scheduling information associated to particular job queue. If nothing is set would return <b>"N/A"</b> @return Scheduling information associated to particular Job Queue]]> </doc> </method> <method name="setState" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="state" type="org.apache.hadoop.mapreduce.QueueState"/> <doc> <![CDATA[Set the state of the queue @param state state of the queue.]]> </doc> </method> <method name="getState" return="org.apache.hadoop.mapreduce.QueueState" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the queue state @return the queue state.]]> </doc> </method> <method name="setJobStatuses" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="stats" type="org.apache.hadoop.mapreduce.JobStatus[]"/> </method> <method name="getQueueChildren" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get immediate children. @return list of QueueInfo]]> </doc> </method> <method name="setQueueChildren" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="children" type="java.util.List"/> </method> <method name="getProperties" return="java.util.Properties" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get properties. @return Properties]]> </doc> </method> <method name="setProperties" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="props" type="java.util.Properties"/> </method> <method name="getJobStatuses" return="org.apache.hadoop.mapreduce.JobStatus[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the jobs submitted to queue @return list of JobStatus for the submitted jobs]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Class that contains the information regarding the Job Queues which are maintained by the Hadoop Map/Reduce framework.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.QueueInfo --> <!-- start class org.apache.hadoop.mapreduce.QueueState --> <class name="QueueState" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.QueueState[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.QueueState" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <method name="getStateName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the stateName]]> </doc> </method> <method name="getState" return="org.apache.hadoop.mapreduce.QueueState" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="state" type="java.lang.String"/> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Enum representing queue state]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.QueueState --> <!-- start class org.apache.hadoop.mapreduce.RecordReader --> <class name="RecordReader" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.io.Closeable"/> <constructor name="RecordReader" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="initialize" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Called once at initialization. @param split the split that defines the range of records to read @param context the information about the task @throws IOException @throws InterruptedException]]> </doc> </method> <method name="nextKeyValue" return="boolean" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Read the next key, value pair. @return true if a key/value pair was read @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getCurrentKey" return="KEYIN" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the current key @return the current key or null if there is no current key @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getCurrentValue" return="VALUEIN" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the current value. @return the object that was read @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getProgress" return="float" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[The current progress of the record reader through its data. @return a number between 0.0 and 1.0 that is the fraction of the data read @throws IOException @throws InterruptedException]]> </doc> </method> <method name="close" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close the record reader.]]> </doc> </method> <doc> <![CDATA[The record reader breaks the data into key/value pairs for input to the {@link Mapper}. @param <KEYIN> @param <VALUEIN>]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.RecordReader --> <!-- start class org.apache.hadoop.mapreduce.RecordWriter --> <class name="RecordWriter" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="RecordWriter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="write" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Writes a key/value pair. @param key the key to write. @param value the value to write. @throws IOException]]> </doc> </method> <method name="close" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Close this <code>RecordWriter</code> to future operations. @param context the context of the task @throws IOException]]> </doc> </method> <doc> <![CDATA[<code>RecordWriter</code> writes the output <key, value> pairs to an output file. <p><code>RecordWriter</code> implementations write the job outputs to the {@link FileSystem}. @see OutputFormat]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.RecordWriter --> <!-- start interface org.apache.hadoop.mapreduce.ReduceContext --> <interface name="ReduceContext" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> <method name="nextKey" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Start processing next unique key.]]> </doc> </method> <method name="getValues" return="java.lang.Iterable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Iterate through the values for the current key, reusing the same value object, which is stored in the context. @return the series of values associated with the current key. All of the objects returned directly and indirectly from this method are reused.]]> </doc> </method> <doc> <![CDATA[The context passed to the {@link Reducer}. @param <KEYIN> the class of the input keys @param <VALUEIN> the class of the input values @param <KEYOUT> the class of the output keys @param <VALUEOUT> the class of the output values]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.ReduceContext --> <!-- start interface org.apache.hadoop.mapreduce.ReduceContext.ValueIterator --> <interface name="ReduceContext.ValueIterator" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.MarkableIteratorInterface"/> <method name="resetBackupStore" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This method is called when the reducer moves from one key to another. @throws IOException]]> </doc> </method> <doc> <![CDATA[{@link Iterator} to iterate over values for a given group of records.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.ReduceContext.ValueIterator --> <!-- start class org.apache.hadoop.mapreduce.Reducer --> <class name="Reducer" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Reducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Called once at the start of the task.]]> </doc> </method> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="key" type="KEYIN"/> <param name="values" type="java.lang.Iterable"/> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[This method is called once for each key. Most applications will define their reduce class by overriding this method. The default implementation is an identity function.]]> </doc> </method> <method name="cleanup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Called once at the end of the task.]]> </doc> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Advanced application writers can use the {@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to control how the reduce task works.]]> </doc> </method> <doc> <![CDATA[Reduces a set of intermediate values which share a key to a smaller set of values. <p><code>Reducer</code> implementations can access the {@link Configuration} for the job via the {@link JobContext#getConfiguration()} method.</p> <p><code>Reducer</code> has 3 primary phases:</p> <ol> <li> <h4 id="Shuffle">Shuffle</h4> <p>The <code>Reducer</code> copies the sorted output from each {@link Mapper} using HTTP across the network.</p> </li> <li> <h4 id="Sort">Sort</h4> <p>The framework merge sorts <code>Reducer</code> inputs by <code>key</code>s (since different <code>Mapper</code>s may have output the same key).</p> <p>The shuffle and sort phases occur simultaneously i.e. while outputs are being fetched they are merged.</p> <h5 id="SecondarySort">SecondarySort</h5> <p>To achieve a secondary sort on the values returned by the value iterator, the application should extend the key with the secondary key and define a grouping comparator. The keys will be sorted using the entire key, but will be grouped using the grouping comparator to decide which keys and values are sent in the same call to reduce.The grouping comparator is specified via {@link Job#setGroupingComparatorClass(Class)}. The sort order is controlled by {@link Job#setSortComparatorClass(Class)}.</p> For example, say that you want to find duplicate web pages and tag them all with the url of the "best" known example. You would set up the job like: <ul> <li>Map Input Key: url</li> <li>Map Input Value: document</li> <li>Map Output Key: document checksum, url pagerank</li> <li>Map Output Value: url</li> <li>Partitioner: by checksum</li> <li>OutputKeyComparator: by checksum and then decreasing pagerank</li> <li>OutputValueGroupingComparator: by checksum</li> </ul> </li> <li> <h4 id="Reduce">Reduce</h4> <p>In this phase the {@link #reduce(Object, Iterable, Context)} method is called for each <code><key, (collection of values)></code> in the sorted inputs.</p> <p>The output of the reduce task is typically written to a {@link RecordWriter} via {@link Context#write(Object, Object)}.</p> </li> </ol> <p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p> <p>Example:</p> <p><blockquote><pre> public class IntSumReducer<Key> extends Reducer<Key,IntWritable, Key,IntWritable> { private IntWritable result = new IntWritable(); public void reduce(Key key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int sum = 0; for (IntWritable val : values) { sum += val.get(); } result.set(sum); context.write(key, result); } } </pre></blockquote></p> @see Mapper @see Partitioner]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Reducer --> <!-- start class org.apache.hadoop.mapreduce.Reducer.Context --> <class name="Reducer.Context" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.ReduceContext"/> <constructor name="Reducer.Context" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[The <code>Context</code> passed on to the {@link Reducer} implementations.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.Reducer.Context --> <!-- start interface org.apache.hadoop.mapreduce.TaskAttemptContext --> <interface name="TaskAttemptContext" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.JobContext"/> <implements name="org.apache.hadoop.util.Progressable"/> <method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the unique name for this task attempt.]]> </doc> </method> <method name="setStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="msg" type="java.lang.String"/> <doc> <![CDATA[Set the current status of the task to the given string.]]> </doc> </method> <method name="getStatus" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the last set status message. @return the current status message]]> </doc> </method> <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counterName" type="java.lang.Enum"/> <doc> <![CDATA[Get the {@link Counter} for the given <code>counterName</code>. @param counterName counter name @return the <code>Counter</code> for the given <code>counterName</code>]]> </doc> </method> <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="groupName" type="java.lang.String"/> <param name="counterName" type="java.lang.String"/> <doc> <![CDATA[Get the {@link Counter} for the given <code>groupName</code> and <code>counterName</code>. @param counterName counter name @return the <code>Counter</code> for the given <code>groupName</code> and <code>counterName</code>]]> </doc> </method> <doc> <![CDATA[The context for task attempts.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.TaskAttemptContext --> <!-- start class org.apache.hadoop.mapreduce.TaskAttemptID --> <class name="TaskAttemptID" extends="org.apache.hadoop.mapred.ID" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskAttemptID" type="org.apache.hadoop.mapreduce.TaskID, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}. @param taskId TaskID that this task belongs to @param id the task attempt number]]> </doc> </constructor> <constructor name="TaskAttemptID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskId object from given parts. @param jtIdentifier jobTracker identifier @param jobId job number @param type the TaskType @param taskId taskId number @param id the task attempt number]]> </doc> </constructor> <constructor name="TaskAttemptID" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the {@link JobID} object that this task attempt belongs to]]> </doc> </method> <method name="getTaskID" return="org.apache.hadoop.mapreduce.TaskID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the {@link TaskID} object that this task attempt belongs to]]> </doc> </method> <method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the TaskType of the TaskAttemptID]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="java.lang.Object"/> </method> <method name="appendTo" return="java.lang.StringBuilder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="builder" type="java.lang.StringBuilder"/> <doc> <![CDATA[Add the unique string to the StringBuilder @param builder the builder to append ot @return the builder that was passed in.]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="org.apache.hadoop.mapreduce.ID"/> <doc> <![CDATA[Compare TaskIds by first tipIds, then by task numbers.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="forName" return="org.apache.hadoop.mapreduce.TaskAttemptID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="str" type="java.lang.String"/> <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> <doc> <![CDATA[Construct a TaskAttemptID object from given string @return constructed TaskAttemptID object or null if the given String is null @throws IllegalArgumentException if the given string is malformed]]> </doc> </method> <field name="ATTEMPT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[TaskAttemptID represents the immutable and unique identifier for a task attempt. Each task attempt is one particular instance of a Map or Reduce Task identified by its TaskID. TaskAttemptID consists of 2 parts. First part is the {@link TaskID}, that this TaskAttemptID belongs to. Second part is the task attempt number. <br> An example TaskAttemptID is : <code>attempt_200707121733_0003_m_000005_0</code> , which represents the zeroth task attempt for the fifth map task in the third job running at the jobtracker started at <code>200707121733</code>. <p> Applications should never construct or parse TaskAttemptID strings , but rather use appropriate constructors or {@link #forName(String)} method. @see JobID @see TaskID]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.TaskAttemptID --> <!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent --> <class name="TaskCompletionEvent" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="TaskCompletionEvent" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default constructor for Writable.]]> </doc> </constructor> <constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapreduce.TaskAttemptID, int, boolean, org.apache.hadoop.mapreduce.TaskCompletionEvent.Status, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor. eventId should be created externally and incremented per event for each job. @param eventId event id, event id should be unique and assigned in incrementally, starting from 0. @param taskId task id @param status task's status @param taskTrackerHttp task tracker's host:port for http.]]> </doc> </constructor> <method name="getEventId" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns event Id. @return event id]]> </doc> </method> <method name="getTaskAttemptId" return="org.apache.hadoop.mapreduce.TaskAttemptID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns task id. @return task id]]> </doc> </method> <method name="getStatus" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns enum Status.SUCESS or Status.FAILURE. @return task tracker status]]> </doc> </method> <method name="getTaskTrackerHttp" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[http location of the tasktracker where this task ran. @return http location of tasktracker user logs]]> </doc> </method> <method name="getTaskRunTime" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns time (in millisec) the task took to complete.]]> </doc> </method> <method name="setTaskRunTime" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="taskCompletionTime" type="int"/> <doc> <![CDATA[Set the task completion time @param taskCompletionTime time (in millisec) the task took to complete]]> </doc> </method> <method name="setEventId" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="eventId" type="int"/> <doc> <![CDATA[set event Id. should be assigned incrementally starting from 0. @param eventId]]> </doc> </method> <method name="setTaskAttemptId" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> <doc> <![CDATA[Sets task id. @param taskId]]> </doc> </method> <method name="setTaskStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="status" type="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status"/> <doc> <![CDATA[Set task status. @param status]]> </doc> </method> <method name="setTaskTrackerHttp" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="taskTrackerHttp" type="java.lang.String"/> <doc> <![CDATA[Set task tracker http location. @param taskTrackerHttp]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="java.lang.Object"/> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="isMapTask" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="idWithinJob" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="EMPTY_ARRAY" type="org.apache.hadoop.mapreduce.TaskCompletionEvent[]" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This is used to track task completion events on job tracker.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent --> <!-- start class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status --> <class name="TaskCompletionEvent.Status" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCompletionEvent.Status" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.TaskCompletionEvent.Status --> <!-- start class org.apache.hadoop.mapreduce.TaskCounter --> <class name="TaskCounter" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.TaskCounter[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.TaskCounter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.TaskCounter --> <!-- start class org.apache.hadoop.mapreduce.TaskID --> <class name="TaskID" extends="org.apache.hadoop.mapred.ID" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, org.apache.hadoop.mapreduce.TaskType, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskID object from given {@link JobID}. @param jobId JobID that this tip belongs to @param type the {@link TaskType} of the task @param id the tip number]]> </doc> </constructor> <constructor name="TaskID" type="java.lang.String, int, org.apache.hadoop.mapreduce.TaskType, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a TaskInProgressId object from given parts. @param jtIdentifier jobTracker identifier @param jobId job number @param type the TaskType @param id the tip number]]> </doc> </constructor> <constructor name="TaskID" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the {@link JobID} object that this tip belongs to]]> </doc> </method> <method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the type of the task]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="java.lang.Object"/> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="org.apache.hadoop.mapreduce.ID"/> <doc> <![CDATA[Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are defined as greater then maps.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="appendTo" return="java.lang.StringBuilder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="builder" type="java.lang.StringBuilder"/> <doc> <![CDATA[Add the unique string to the given builder. @param builder the builder to append to @return the builder that was passed in]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="forName" return="org.apache.hadoop.mapreduce.TaskID" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="str" type="java.lang.String"/> <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> <doc> <![CDATA[Construct a TaskID object from given string @return constructed TaskID object or null if the given String is null @throws IllegalArgumentException if the given string is malformed]]> </doc> </method> <method name="getRepresentingCharacter" return="char" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="type" type="org.apache.hadoop.mapreduce.TaskType"/> <doc> <![CDATA[Gets the character representing the {@link TaskType} @param type the TaskType @return the character]]> </doc> </method> <method name="getTaskType" return="org.apache.hadoop.mapreduce.TaskType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="c" type="char"/> <doc> <![CDATA[Gets the {@link TaskType} corresponding to the character @param c the character @return the TaskType]]> </doc> </method> <method name="getAllTaskTypes" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <field name="TASK" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="idFormat" type="java.text.NumberFormat" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[TaskID represents the immutable and unique identifier for a Map or Reduce Task. Each TaskID encompasses multiple attempts made to execute the Map or Reduce Task, each of which are uniquely indentified by their TaskAttemptID. TaskID consists of 3 parts. First part is the {@link JobID}, that this TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r' representing whether the task is a map task or a reduce task. And the third part is the task number. <br> An example TaskID is : <code>task_200707121733_0003_m_000005</code> , which represents the fifth map task in the third job running at the jobtracker started at <code>200707121733</code>. <p> Applications should never construct or parse TaskID strings , but rather use appropriate constructors or {@link #forName(String)} method. @see JobID @see TaskAttemptID]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.TaskID --> <!-- start interface org.apache.hadoop.mapreduce.TaskInputOutputContext --> <interface name="TaskInputOutputContext" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Advance to the next key, value pair, returning null if at end. @return the key object that was read into, or null if no more]]> </doc> </method> <method name="getCurrentKey" return="KEYIN" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the current key. @return the current key object or null if there isn't one @throws IOException @throws InterruptedException]]> </doc> </method> <method name="getCurrentValue" return="VALUEIN" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the current value. @return the value object that was read into @throws IOException @throws InterruptedException]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="KEYOUT"/> <param name="value" type="VALUEOUT"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Generate an output key/value pair.]]> </doc> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link OutputCommitter} for the task-attempt. @return the <code>OutputCommitter</code> for the task-attempt]]> </doc> </method> <doc> <![CDATA[A context object that allows input and output from the task. It is only supplied to the {@link Mapper} or {@link Reducer}. @param <KEYIN> the input key type for the task @param <VALUEIN> the input value type for the task @param <KEYOUT> the output key type for the task @param <VALUEOUT> the output value type for the task]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.TaskInputOutputContext --> <!-- start class org.apache.hadoop.mapreduce.TaskTrackerInfo --> <class name="TaskTrackerInfo" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="TaskTrackerInfo" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="TaskTrackerInfo" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="TaskTrackerInfo" type="java.lang.String, java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getTaskTrackerName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets the tasktracker's name. @return tracker's name.]]> </doc> </method> <method name="isBlacklisted" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Whether tracker is blacklisted @return true if tracker is blacklisted false otherwise]]> </doc> </method> <method name="getReasonForBlacklist" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets the reason for which the tasktracker was blacklisted. @return reason which tracker was blacklisted]]> </doc> </method> <method name="getBlacklistReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Gets a descriptive report about why the tasktracker was blacklisted. @return report describing why the tasktracker was blacklisted.]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Information about TaskTracker.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.TaskTrackerInfo --> <!-- start class org.apache.hadoop.mapreduce.TaskType --> <class name="TaskType" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.TaskType[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.TaskType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <doc> <![CDATA[Enum for map, reduce, job-setup, job-cleanup, task-cleanup task types.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.TaskType --> </package> <package name="org.apache.hadoop.mapreduce.lib.aggregate"> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum --> <class name="DoubleValueSum" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="DoubleValueSum" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The default constructor]]> </doc> </constructor> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val an object whose string representation represents a double value.]]> </doc> </method> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="double"/> <doc> <![CDATA[add a value to the aggregator @param val a double value.]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of the aggregated value]]> </doc> </method> <method name="getSum" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the aggregated value]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return an array of one element. The element is a string representation of the aggregated value. The return value is expected to be used by the a combiner.]]> </doc> </method> <doc> <![CDATA[This class implements a value aggregator that sums up a sequence of double values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.DoubleValueSum --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax --> <class name="LongValueMax" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="LongValueMax" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[the default constructor]]> </doc> </constructor> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val an object whose string representation represents a long value.]]> </doc> </method> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="newVal" type="long"/> <doc> <![CDATA[add a value to the aggregator @param newVal a long value.]]> </doc> </method> <method name="getVal" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the aggregated value]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of the aggregated value]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return an array of one element. The element is a string representation of the aggregated value. The return value is expected to be used by the a combiner.]]> </doc> </method> <doc> <![CDATA[This class implements a value aggregator that maintain the maximum of a sequence of long values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMax --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin --> <class name="LongValueMin" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="LongValueMin" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[the default constructor]]> </doc> </constructor> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val an object whose string representation represents a long value.]]> </doc> </method> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="newVal" type="long"/> <doc> <![CDATA[add a value to the aggregator @param newVal a long value.]]> </doc> </method> <method name="getVal" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the aggregated value]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of the aggregated value]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return an array of one element. The element is a string representation of the aggregated value. The return value is expected to be used by the a combiner.]]> </doc> </method> <doc> <![CDATA[This class implements a value aggregator that maintain the minimum of a sequence of long values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueMin --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum --> <class name="LongValueSum" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="LongValueSum" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[the default constructor]]> </doc> </constructor> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val an object whose string representation represents a long value.]]> </doc> </method> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="long"/> <doc> <![CDATA[add a value to the aggregator @param val a long value.]]> </doc> </method> <method name="getSum" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the aggregated value]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of the aggregated value]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return an array of one element. The element is a string representation of the aggregated value. The return value is expected to be used by the a combiner.]]> </doc> </method> <doc> <![CDATA[This class implements a value aggregator that sums up a sequence of long values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.LongValueSum --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax --> <class name="StringValueMax" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="StringValueMax" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[the default constructor]]> </doc> </constructor> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val a string.]]> </doc> </method> <method name="getVal" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the aggregated value]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of the aggregated value]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return an array of one element. The element is a string representation of the aggregated value. The return value is expected to be used by the a combiner.]]> </doc> </method> <doc> <![CDATA[This class implements a value aggregator that maintain the biggest of a sequence of strings.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMax --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin --> <class name="StringValueMin" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="StringValueMin" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[the default constructor]]> </doc> </constructor> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val a string.]]> </doc> </method> <method name="getVal" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the aggregated value]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of the aggregated value]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return an array of one element. The element is a string representation of the aggregated value. The return value is expected to be used by the a combiner.]]> </doc> </method> <doc> <![CDATA[This class implements a value aggregator that maintain the smallest of a sequence of strings.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.StringValueMin --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount --> <class name="UniqValueCount" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="UniqValueCount" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[the default constructor]]> </doc> </constructor> <constructor name="UniqValueCount" type="long" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[constructor @param maxNum the limit in the number of unique values to keep.]]> </doc> </constructor> <method name="setMaxItems" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="n" type="long"/> <doc> <![CDATA[Set the limit on the number of unique values @param n the desired limit on the number of unique values @return the new limit on the number of unique values]]> </doc> </method> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val an object.]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return the number of unique objects aggregated]]> </doc> </method> <method name="getUniqueItems" return="java.util.Set" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the set of the unique objects]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return return an array of the unique objects. The return value is expected to be used by the a combiner.]]> </doc> </method> <field name="MAX_NUM_UNIQUE_VALUES" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UniqValueCount --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor --> <class name="UserDefinedValueAggregatorDescriptor" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/> <constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param className the class name of the user defined descriptor class @param conf a configure object used for decriptor configuration]]> </doc> </constructor> <method name="createInstance" return="java.lang.Object" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="className" type="java.lang.String"/> <doc> <![CDATA[Create an instance of the given class @param className the name of the class @return a dynamically created instance of the given class]]> </doc> </method> <method name="generateKeyValPairs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[Generate a list of aggregation-id/value pairs for the given key/value pairs by delegating the invocation to the real object. @param key input key @param val input value @return a list of aggregation id/value pairs. An aggregation id encodes an aggregation type which is used to guide the way to aggregate the value in the reduce/combiner phrase of an Aggregate based job.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of this object.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Do nothing.]]> </doc> </method> <field name="theAggregatorDescriptor" type="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements a wrapper for a user defined value aggregator descriptor. It serves two functions: One is to create an object of ValueAggregatorDescriptor from the name of a user defined class that may be dynamically loaded. The other is to delegate invocations of generateKeyValPairs function to the created object.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.UserDefinedValueAggregatorDescriptor --> <!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator --> <interface name="ValueAggregator" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add a value to the aggregator @param val the value to be added]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of the agregator]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return an array of values as the outputs of the combiner.]]> </doc> </method> <doc> <![CDATA[This interface defines the minimal protocol for value aggregators.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor --> <class name="ValueAggregatorBaseDescriptor" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor"/> <constructor name="ValueAggregatorBaseDescriptor" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="generateEntry" return="java.util.Map.Entry" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="type" type="java.lang.String"/> <param name="id" type="java.lang.String"/> <param name="val" type="org.apache.hadoop.io.Text"/> <doc> <![CDATA[@param type the aggregation type @param id the aggregation id @param val the val associated with the id to be aggregated @return an Entry whose key is the aggregation id prefixed with the aggregation type.]]> </doc> </method> <method name="generateValueAggregator" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="type" type="java.lang.String"/> <param name="uniqCount" type="long"/> <doc> <![CDATA[@param type the aggregation type @param uniqCount the limit in the number of unique values to keep, if type is UNIQ_VALUE_COUNT @return a value aggregator of the given type.]]> </doc> </method> <method name="generateKeyValPairs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[Generate 1 or 2 aggregation-id/value pairs for the given key/value pair. The first id will be of type LONG_VALUE_SUM, with "record_count" as its aggregation id. If the input is a file split, the second id of the same type will be generated too, with the file name as its aggregation id. This achieves the behavior of counting the total number of records in the input data, and the number of records in each input file. @param key input key @param val input value @return a list of aggregation id/value pairs. An aggregation id encodes an aggregation type which is used to guide the way to aggregate the value in the reduce/combiner phrase of an Aggregate based job.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[get the input file name. @param conf a configuration object]]> </doc> </method> <field name="UNIQ_VALUE_COUNT" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="LONG_VALUE_SUM" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="DOUBLE_VALUE_SUM" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="VALUE_HISTOGRAM" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="LONG_VALUE_MAX" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="LONG_VALUE_MIN" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="STRING_VALUE_MAX" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="STRING_VALUE_MIN" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="inputFile" type="java.lang.String" transient="false" volatile="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements the common functionalities of the subclasses of ValueAggregatorDescriptor class.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorBaseDescriptor --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner --> <class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapreduce.Reducer" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ValueAggregatorCombiner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="values" type="java.lang.Iterable"/> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Combines values for a given key. @param key the key is expected to be a Text object, whose prefix indicates the type of aggregation to aggregate the values. @param values the values to combine @param context to collect combined values]]> </doc> </method> <doc> <![CDATA[This class implements the generic combiner of Aggregate.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorCombiner --> <!-- start interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor --> <interface name="ValueAggregatorDescriptor" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="generateKeyValPairs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[Generate a list of aggregation-id/value pairs for the given key/value pair. This function is usually called by the mapper of an Aggregate based job. @param key input key @param val input value @return a list of aggregation id/value pairs. An aggregation id encodes an aggregation type which is used to guide the way to aggregate the value in the reduce/combiner phrase of an Aggregate based job.]]> </doc> </method> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Configure the object @param conf a Configuration object that may contain the information that can be used to configure the object.]]> </doc> </method> <field name="TYPE_SEPARATOR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="ONE" type="org.apache.hadoop.io.Text" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This interface defines the contract a value aggregator descriptor must support. Such a descriptor can be configured with a {@link Configuration} object. Its main function is to generate a list of aggregation-id/value pairs. An aggregation id encodes an aggregation type which is used to guide the way to aggregate the value in the reduce/combiner phrase of an Aggregate based job. The mapper in an Aggregate based map/reduce job may create one or more of ValueAggregatorDescriptor objects at configuration time. For each input key/value pair, the mapper will use those objects to create aggregation id/value pairs.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob --> <class name="ValueAggregatorJob" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ValueAggregatorJob" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <param name="descriptors" type="java.lang.Class[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createValueAggregatorJobs" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create an Aggregate based map/reduce job. @param conf The configuration for job @param args the arguments used for job creation. Generic hadoop arguments are accepted. @return a Job object ready for submission. @throws IOException @see GenericOptionsParser]]> </doc> </method> <method name="createValueAggregatorJob" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <param name="descriptors" type="java.lang.Class[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setAggregatorDescriptors" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="descriptors" type="java.lang.Class[]"/> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <doc> <![CDATA[create and run an Aggregate based map/reduce job. @param args the arguments used for job creation @throws IOException]]> </doc> </method> <doc> <![CDATA[This is the main class for creating a map/reduce job using Aggregate framework. The Aggregate is a specialization of map/reduce framework, specializing for performing various simple aggregations. Generally speaking, in order to implement an application using Map/Reduce model, the developer is to implement Map and Reduce functions (and possibly combine function). However, a lot of applications related to counting and statistics computing have very similar characteristics. Aggregate abstracts out the general patterns of these functions and implementing those patterns. In particular, the package provides generic mapper/redducer/combiner classes, and a set of built-in value aggregators, and a generic utility class that helps user create map/reduce jobs using the generic class. The built-in aggregators include: sum over numeric values count the number of distinct values compute the histogram of values compute the minimum, maximum, media,average, standard deviation of numeric values The developer using Aggregate will need only to provide a plugin class conforming to the following interface: public interface ValueAggregatorDescriptor { public ArrayList<Entry> generateKeyValPairs(Object key, Object value); public void configure(Configuration conf); } The package also provides a base class, ValueAggregatorBaseDescriptor, implementing the above interface. The user can extend the base class and implement generateKeyValPairs accordingly. The primary work of generateKeyValPairs is to emit one or more key/value pairs based on the input key/value pair. The key in an output key/value pair encode two pieces of information: aggregation type and aggregation id. The value will be aggregated onto the aggregation id according the aggregation type. This class offers a function to generate a map/reduce job using Aggregate framework. The function takes the following parameters: input directory spec input format (text or sequence file) output directory a file specifying the user plugin class]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJob --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase --> <class name="ValueAggregatorJobBase" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ValueAggregatorJobBase" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getValueAggregatorDescriptor" return="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorDescriptor" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <param name="spec" type="java.lang.String"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getAggregatorDescriptors" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="logSpec" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> </method> <field name="DESCRIPTOR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="DESCRIPTOR_NUM" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="USER_JAR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="aggregatorDescriptorList" type="java.util.ArrayList" transient="false" volatile="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[This abstract class implements some common functionalities of the the generic mapper, reducer and combiner classes of Aggregate.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorJobBase --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper --> <class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ValueAggregatorMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K1"/> <param name="value" type="V1"/> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[the map function. It iterates through the value aggregator descriptor list to generate aggregation id/value pairs and emit them.]]> </doc> </method> <doc> <![CDATA[This class implements the generic mapper of Aggregate.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer --> <class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapreduce.Reducer" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ValueAggregatorReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="values" type="java.lang.Iterable"/> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[@param key the key is expected to be a Text object, whose prefix indicates the type of aggregation to aggregate the values. In effect, data driven computing is achieved. It is assumed that each aggregator's getReport method emits appropriate output for the aggregator. This may be further customized. @param values the values to be aggregated @param context]]> </doc> </method> <doc> <![CDATA[This class implements the generic reducer of Aggregate.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregatorReducer --> <!-- start class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram --> <class name="ValueHistogram" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.aggregate.ValueAggregator"/> <constructor name="ValueHistogram" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="addNextValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="java.lang.Object"/> <doc> <![CDATA[add the given val to the aggregator. @param val the value to be added. It is expected to be a string in the form of xxxx\tnum, meaning xxxx has num occurrences.]]> </doc> </method> <method name="getReport" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the string representation of this aggregator. It includes the following basic statistics of the histogram: the number of unique values the minimum value the media value the maximum value the average value the standard deviation]]> </doc> </method> <method name="getReportDetails" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return a string representation of the list of value/frequence pairs of the histogram]]> </doc> </method> <method name="getCombinerOutput" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return a list value/frequence pairs. The return value is expected to be used by the reducer.]]> </doc> </method> <method name="getReportItems" return="java.util.TreeMap" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return a TreeMap representation of the histogram]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[reset the aggregator]]> </doc> </method> <doc> <![CDATA[This class implements a value aggregator that computes the histogram of a sequence of strings.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.aggregate.ValueHistogram --> </package> <package name="org.apache.hadoop.mapreduce.lib.chain"> <!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainMapper --> <class name="ChainMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ChainMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="addMapper" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="klass" type="java.lang.Class"/> <param name="inputKeyClass" type="java.lang.Class"/> <param name="inputValueClass" type="java.lang.Class"/> <param name="outputKeyClass" type="java.lang.Class"/> <param name="outputValueClass" type="java.lang.Class"/> <param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Adds a {@link Mapper} class to the chain mapper. <p> The key and values are passed from one element of the chain to the next, by value. For the added Mapper the configuration given for it, <code>mapperConf</code>, have precedence over the job's Configuration. This precedence is in effect when the task is running. </p> <p> IMPORTANT: There is no need to specify the output key/value classes for the ChainMapper, this is done by the addMapper for the last mapper in the chain </p> @param job The job. @param klass the Mapper class to add. @param inputKeyClass mapper input key class. @param inputValueClass mapper input value class. @param outputKeyClass mapper output key class. @param outputValueClass mapper output value class. @param mapperConf a configuration for the Mapper class. It is recommended to use a Configuration without default values using the <code>Configuration(boolean loadDefaults)</code> constructor with FALSE.]]> </doc> </method> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <doc> <![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single Map task. <p> The Mapper classes are invoked in a chained (or piped) fashion, the output of the first becomes the input of the second, and so on until the last Mapper, the output of the last Mapper will be written to the task's output. </p> <p> The key functionality of this feature is that the Mappers in the chain do not need to be aware that they are executed in a chain. This enables having reusable specialized Mappers that can be combined to perform composite operations within a single task. </p> <p> Special care has to be taken when creating chains that the key/values output by a Mapper are valid for the following Mapper in the chain. It is assumed all Mappers and the Reduce in the chain use matching output and input key and value classes as no conversion is done by the chaining code. </p> <p> Using the ChainMapper and the ChainReducer classes is possible to compose Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And immediate benefit of this pattern is a dramatic reduction in disk IO. </p> <p> IMPORTANT: There is no need to specify the output key/value classes for the ChainMapper, this is done by the addMapper for the last mapper in the chain. </p> ChainMapper usage pattern: <p/> <pre> ... Job = new Job(conf); <p/> Configuration mapAConf = new Configuration(false); ... ChainMapper.addMapper(job, AMap.class, LongWritable.class, Text.class, Text.class, Text.class, true, mapAConf); <p/> Configuration mapBConf = new Configuration(false); ... ChainMapper.addMapper(job, BMap.class, Text.class, Text.class, LongWritable.class, Text.class, false, mapBConf); <p/> ... <p/> job.waitForComplettion(true); ... </pre>]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.chain.ChainReducer --> <class name="ChainReducer" extends="org.apache.hadoop.mapreduce.Reducer" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ChainReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setReducer" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="klass" type="java.lang.Class"/> <param name="inputKeyClass" type="java.lang.Class"/> <param name="inputValueClass" type="java.lang.Class"/> <param name="outputKeyClass" type="java.lang.Class"/> <param name="outputValueClass" type="java.lang.Class"/> <param name="reducerConf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Sets the {@link Reducer} class to the chain job. <p> The key and values are passed from one element of the chain to the next, by value. For the added Reducer the configuration given for it, <code>reducerConf</code>, have precedence over the job's Configuration. This precedence is in effect when the task is running. </p> <p> IMPORTANT: There is no need to specify the output key/value classes for the ChainReducer, this is done by the setReducer or the addMapper for the last element in the chain. </p> @param job the job @param klass the Reducer class to add. @param inputKeyClass reducer input key class. @param inputValueClass reducer input value class. @param outputKeyClass reducer output key class. @param outputValueClass reducer output value class. @param reducerConf a configuration for the Reducer class. It is recommended to use a Configuration without default values using the <code>Configuration(boolean loadDefaults)</code> constructor with FALSE.]]> </doc> </method> <method name="addMapper" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="klass" type="java.lang.Class"/> <param name="inputKeyClass" type="java.lang.Class"/> <param name="inputValueClass" type="java.lang.Class"/> <param name="outputKeyClass" type="java.lang.Class"/> <param name="outputValueClass" type="java.lang.Class"/> <param name="mapperConf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Adds a {@link Mapper} class to the chain reducer. <p> The key and values are passed from one element of the chain to the next, by value For the added Mapper the configuration given for it, <code>mapperConf</code>, have precedence over the job's Configuration. This precedence is in effect when the task is running. </p> <p> IMPORTANT: There is no need to specify the output key/value classes for the ChainMapper, this is done by the addMapper for the last mapper in the chain. </p> @param job The job. @param klass the Mapper class to add. @param inputKeyClass mapper input key class. @param inputValueClass mapper input value class. @param outputKeyClass mapper output key class. @param outputValueClass mapper output value class. @param mapperConf a configuration for the Mapper class. It is recommended to use a Configuration without default values using the <code>Configuration(boolean loadDefaults)</code> constructor with FALSE.]]> </doc> </method> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <doc> <![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a Reducer within the Reducer task. <p> For each record output by the Reducer, the Mapper classes are invoked in a chained (or piped) fashion. The output of the reducer becomes the input of the first mapper and output of first becomes the input of the second, and so on until the last Mapper, the output of the last Mapper will be written to the task's output. </p> <p> The key functionality of this feature is that the Mappers in the chain do not need to be aware that they are executed after the Reducer or in a chain. This enables having reusable specialized Mappers that can be combined to perform composite operations within a single task. </p> <p> Special care has to be taken when creating chains that the key/values output by a Mapper are valid for the following Mapper in the chain. It is assumed all Mappers and the Reduce in the chain use matching output and input key and value classes as no conversion is done by the chaining code. </p> </p> Using the ChainMapper and the ChainReducer classes is possible to compose Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And immediate benefit of this pattern is a dramatic reduction in disk IO. </p> <p> IMPORTANT: There is no need to specify the output key/value classes for the ChainReducer, this is done by the setReducer or the addMapper for the last element in the chain. </p> ChainReducer usage pattern: <p/> <pre> ... Job = new Job(conf); .... <p/> Configuration reduceConf = new Configuration(false); ... ChainReducer.setReducer(job, XReduce.class, LongWritable.class, Text.class, Text.class, Text.class, true, reduceConf); <p/> ChainReducer.addMapper(job, CMap.class, Text.class, Text.class, LongWritable.class, Text.class, false, null); <p/> ChainReducer.addMapper(job, DMap.class, LongWritable.class, Text.class, LongWritable.class, LongWritable.class, true, null); <p/> ... <p/> job.waitForCompletion(true); ... </pre>]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.chain.ChainReducer --> </package> <package name="org.apache.hadoop.mapreduce.lib.db"> <!-- start class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter --> <class name="BigDecimalSplitter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> <constructor name="BigDecimalSplitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="split" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="results" type="java.sql.ResultSet"/> <param name="colName" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <method name="tryDivide" return="java.math.BigDecimal" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="numerator" type="java.math.BigDecimal"/> <param name="denominator" type="java.math.BigDecimal"/> <doc> <![CDATA[Divide numerator by denominator. If impossible in exact mode, use rounding.]]> </doc> </method> <doc> <![CDATA[Implement DBSplitter over BigDecimal values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter --> <!-- start class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter --> <class name="BooleanSplitter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> <constructor name="BooleanSplitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="split" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="results" type="java.sql.ResultSet"/> <param name="colName" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <doc> <![CDATA[Implement DBSplitter over boolean values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.BooleanSplitter --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat --> <class name="DataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="DataDrivenDBInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="sqlDataType" type="int"/> <doc> <![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]> </doc> </method> <method name="getSplits" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getBoundingValsQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[@return a query which returns the minimum and maximum values for the order-by column. The min value should be in the first column, and the max value should be in the second column of the results.]]> </doc> </method> <method name="setBoundingQuery" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="query" type="java.lang.String"/> <doc> <![CDATA[Set the user-defined bounding query to use with a user-defined query. This *must* include the substring "$CONDITIONS" (DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) inside the WHERE clause, so that DataDrivenDBInputFormat knows where to insert split clauses. e.g., "SELECT foo FROM mytable WHERE $CONDITIONS" This will be expanded to something like: SELECT foo FROM mytable WHERE (id > 100) AND (id < 250) inside each split.]]> </doc> </method> <method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setInput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="inputClass" type="java.lang.Class"/> <param name="tableName" type="java.lang.String"/> <param name="conditions" type="java.lang.String"/> <param name="splitBy" type="java.lang.String"/> <param name="fieldNames" type="java.lang.String[]"/> <doc> <![CDATA[Note that the "orderBy" column is called the "splitBy" in this version. We reuse the same field, but it's not strictly ordering it -- just partitioning the results.]]> </doc> </method> <method name="setInput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="inputClass" type="java.lang.Class"/> <param name="inputQuery" type="java.lang.String"/> <param name="inputBoundingQuery" type="java.lang.String"/> <doc> <![CDATA[setInput() takes a custom query and a separate "bounding query" to use instead of the custom "count query" used by DBInputFormat.]]> </doc> </method> <field name="SUBSTITUTE_TOKEN" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[If users are providing their own query, the following string is expected to appear in the WHERE clause, which will be substituted with a pair of conditions on the input to allow input splits to parallelise the import.]]> </doc> </field> <doc> <![CDATA[A InputFormat that reads input data from an SQL table. Operates like DBInputFormat, but instead of using LIMIT and OFFSET to demarcate splits, it tries to generate WHERE clauses which separate the data into roughly equivalent shards.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit --> <class name="DataDrivenDBInputFormat.DataDrivenDBInputSplit" extends="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DataDrivenDBInputFormat.DataDrivenDBInputSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default Constructor]]> </doc> </constructor> <constructor name="DataDrivenDBInputFormat.DataDrivenDBInputSplit" type="java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Convenience Constructor @param lower the string to be put in the WHERE clause to guard on the 'lower' end @param upper the string to be put in the WHERE clause to guard on the 'upper' end]]> </doc> </constructor> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@return The total row count in this split]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="input" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="output" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getLowerClause" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getUpperClause" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[A InputSplit that spans a set of rows]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat.DataDrivenDBInputSplit --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader --> <class name="DataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[@param split The InputSplit to read data for @throws SQLException]]> </doc> </constructor> <method name="getSelectQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Returns the query for selecting the records, subclasses can override this for custom behaviour.]]> </doc> </method> <doc> <![CDATA[A RecordReader that reads records from a SQL table, using data-driven WHERE clause splits. Emits LongWritables containing the record number as key and DBWritables as value.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DateSplitter --> <class name="DateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.IntegerSplitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DateSplitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="split" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="results" type="java.sql.ResultSet"/> <param name="colName" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <method name="dateToString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="d" type="java.util.Date"/> <doc> <![CDATA[Given a Date 'd', format it as a string for use in a SQL date comparison operation. @param d the date to format. @return the string representing this date in SQL with any appropriate quotation characters, etc.]]> </doc> </method> <doc> <![CDATA[Implement DBSplitter over date/time values. Make use of logic from IntegerSplitter, since date/time are just longs in Java.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DateSplitter --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DBConfiguration --> <class name="DBConfiguration" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DBConfiguration" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configureDB" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="driverClass" type="java.lang.String"/> <param name="dbUrl" type="java.lang.String"/> <param name="userName" type="java.lang.String"/> <param name="passwd" type="java.lang.String"/> <doc> <![CDATA[Sets the DB access related fields in the {@link Configuration}. @param conf the configuration @param driverClass JDBC Driver class name @param dbUrl JDBC DB access URL. @param userName DB access username @param passwd DB access passwd]]> </doc> </method> <method name="configureDB" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.conf.Configuration"/> <param name="driverClass" type="java.lang.String"/> <param name="dbUrl" type="java.lang.String"/> <doc> <![CDATA[Sets the DB access related fields in the JobConf. @param job the job @param driverClass JDBC Driver class name @param dbUrl JDBC DB access URL.]]> </doc> </method> <method name="getConnection" return="java.sql.Connection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[Returns a connection object o the DB @throws ClassNotFoundException @throws SQLException]]> </doc> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getInputTableName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setInputTableName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="tableName" type="java.lang.String"/> </method> <method name="getInputFieldNames" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setInputFieldNames" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="fieldNames" type="java.lang.String[]"/> </method> <method name="getInputConditions" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setInputConditions" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conditions" type="java.lang.String"/> </method> <method name="getInputOrderBy" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setInputOrderBy" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="orderby" type="java.lang.String"/> </method> <method name="getInputQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setInputQuery" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="query" type="java.lang.String"/> </method> <method name="getInputCountQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setInputCountQuery" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="query" type="java.lang.String"/> </method> <method name="setInputBoundingQuery" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="query" type="java.lang.String"/> </method> <method name="getInputBoundingQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getInputClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setInputClass" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="inputClass" type="java.lang.Class"/> </method> <method name="getOutputTableName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setOutputTableName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="tableName" type="java.lang.String"/> </method> <method name="getOutputFieldNames" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setOutputFieldNames" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="fieldNames" type="java.lang.String[]"/> </method> <method name="setOutputFieldCount" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="fieldCount" type="int"/> </method> <method name="getOutputFieldCount" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <field name="DRIVER_CLASS_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The JDBC Driver class name]]> </doc> </field> <field name="URL_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[JDBC Database access URL]]> </doc> </field> <field name="USERNAME_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[User name to access the database]]> </doc> </field> <field name="PASSWORD_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Password to access the database]]> </doc> </field> <field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Input table name]]> </doc> </field> <field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Field names in the Input table]]> </doc> </field> <field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[WHERE clause in the input SELECT statement]]> </doc> </field> <field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[ORDER BY clause in the input SELECT statement]]> </doc> </field> <field name="INPUT_QUERY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Whole input query, exluding LIMIT...OFFSET]]> </doc> </field> <field name="INPUT_COUNT_QUERY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Input query to get the count of records]]> </doc> </field> <field name="INPUT_BOUNDING_QUERY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Input query to get the max and min values of the jdbc.input.query]]> </doc> </field> <field name="INPUT_CLASS_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Class name implementing DBWritable which will hold input tuples]]> </doc> </field> <field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Output table name]]> </doc> </field> <field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Field names in the Output table]]> </doc> </field> <field name="OUTPUT_FIELD_COUNT_PROPERTY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Number of fields in the Output table]]> </doc> </field> <doc> <![CDATA[A container for configuration property names for jobs with DB input/output. The job can be configured using the static methods in this class, {@link DBInputFormat}, and {@link DBOutputFormat}. Alternatively, the properties can be set in the configuration with proper values. @see DBConfiguration#configureDB(Configuration, String, String, String, String) @see DBInputFormat#setInput(Job, Class, String, String) @see DBInputFormat#setInput(Job, Class, String, String, String, String...) @see DBOutputFormat#setOutput(Job, String, String...)]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DBConfiguration --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat --> <class name="DBInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="DBInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getConnection" return="java.sql.Connection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getDBProductName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getSplits" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getCountQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Returns the query for getting the total number of rows, subclasses can override this for custom behaviour.]]> </doc> </method> <method name="setInput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="inputClass" type="java.lang.Class"/> <param name="tableName" type="java.lang.String"/> <param name="conditions" type="java.lang.String"/> <param name="orderBy" type="java.lang.String"/> <param name="fieldNames" type="java.lang.String[]"/> <doc> <![CDATA[Initializes the map-part of the job with the appropriate input settings. @param job The map-reduce job @param inputClass the class object implementing DBWritable, which is the Java object holding tuple fields. @param tableName The table to read data from @param conditions The condition which to select data with, eg. '(updated > 20070101 AND length > 0)' @param orderBy the fieldNames in the orderBy clause. @param fieldNames The field names in the table @see #setInput(Job, Class, String, String)]]> </doc> </method> <method name="setInput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="inputClass" type="java.lang.Class"/> <param name="inputQuery" type="java.lang.String"/> <param name="inputCountQuery" type="java.lang.String"/> <doc> <![CDATA[Initializes the map-part of the job with the appropriate input settings. @param job The map-reduce job @param inputClass the class object implementing DBWritable, which is the Java object holding tuple fields. @param inputQuery the input query to select fields. Example : "SELECT f1, f2, f3 FROM Mytable ORDER BY f1" @param inputCountQuery the input query that returns the number of records in the table. Example : "SELECT COUNT(f1) FROM Mytable" @see #setInput(Job, Class, String, String, String, String...)]]> </doc> </method> <method name="closeConnection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <doc> <![CDATA[A InputFormat that reads input data from an SQL table. <p> DBInputFormat emits LongWritables containing the record number as key and DBWritables as value. The SQL query, and input class can be using one of the two setInput methods.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit --> <class name="DBInputFormat.DBInputSplit" extends="org.apache.hadoop.mapreduce.InputSplit" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="DBInputFormat.DBInputSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Default Constructor]]> </doc> </constructor> <constructor name="DBInputFormat.DBInputSplit" type="long, long" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Convenience Constructor @param start the index of the first row to select @param end the index of the last row to select]]> </doc> </constructor> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getStart" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return The index of the first row to select]]> </doc> </method> <method name="getEnd" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return The index of the last row to select]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@return The total row count in this split]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="input" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="output" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <doc> <![CDATA[A InputSplit that spans a set of rows]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable --> <class name="DBInputFormat.NullDBWritable" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.db.DBWritable"/> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="DBInputFormat.NullDBWritable" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="arg0" type="java.sql.ResultSet"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="arg0" type="java.sql.PreparedStatement"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <doc> <![CDATA[A Class that does nothing, implementing DBWritable]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DBInputFormat.NullDBWritable --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat --> <class name="DBOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DBOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="constructQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="table" type="java.lang.String"/> <param name="fieldNames" type="java.lang.String[]"/> <doc> <![CDATA[Constructs the query used as the prepared statement to insert data. @param table the table to insert into @param fieldNames the fields to insert into. If field names are unknown, supply an array of nulls.]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="setOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="tableName" type="java.lang.String"/> <param name="fieldNames" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Initializes the reduce-part of the job with the appropriate output settings @param job The job @param tableName The table to insert data into @param fieldNames The field names in the table.]]> </doc> </method> <method name="setOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="tableName" type="java.lang.String"/> <param name="fieldCount" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Initializes the reduce-part of the job with the appropriate output settings @param job The job @param tableName The table to insert data into @param fieldCount the number of fields in the table.]]> </doc> </method> <doc> <![CDATA[A OutputFormat that sends the reduce output to a SQL table. <p> {@link DBOutputFormat} accepts <key,value> pairs, where key has a type extending DBWritable. Returned {@link RecordWriter} writes <b>only the key</b> to the database with a batch SQL query.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter --> <class name="DBOutputFormat.DBRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DBOutputFormat.DBRecordWriter" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> </constructor> <constructor name="DBOutputFormat.DBRecordWriter" type="java.sql.Connection, java.sql.PreparedStatement" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> </constructor> <method name="getConnection" return="java.sql.Connection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getStatement" return="java.sql.PreparedStatement" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <doc> <![CDATA[A RecordWriter that writes the reduce output to a SQL table]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DBOutputFormat.DBRecordWriter --> <!-- start class org.apache.hadoop.mapreduce.lib.db.DBRecordReader --> <class name="DBRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[@param split The InputSplit to read data for @throws SQLException]]> </doc> </constructor> <method name="executeQuery" return="java.sql.ResultSet" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="query" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <method name="getSelectQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Returns the query for selecting the records, subclasses can override this for custom behaviour.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentKey" return="org.apache.hadoop.io.LongWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getCurrentValue" return="T" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="createValue" return="T" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="deprecated, no comment"> <doc> <![CDATA[@deprecated]]> </doc> </method> <method name="getPos" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="deprecated, no comment"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@deprecated]]> </doc> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="Use {@link #nextKeyValue()}"> <param name="key" type="org.apache.hadoop.io.LongWritable"/> <param name="value" type="T"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[@deprecated Use {@link #nextKeyValue()}]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getSplit" return="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getFieldNames" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getTableName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getConditions" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getDBConf" return="org.apache.hadoop.mapreduce.lib.db.DBConfiguration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getConnection" return="java.sql.Connection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getStatement" return="java.sql.PreparedStatement" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="setStatement" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="stmt" type="java.sql.PreparedStatement"/> </method> <field name="statement" type="java.sql.PreparedStatement" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[A RecordReader that reads records from a SQL table. Emits LongWritables containing the record number as key and DBWritables as value.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.DBRecordReader --> <!-- start interface org.apache.hadoop.mapreduce.lib.db.DBSplitter --> <interface name="DBSplitter" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="split" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="results" type="java.sql.ResultSet"/> <param name="colName" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[Given a ResultSet containing one record (and already advanced to that record) with two columns (a low value, and a high value, both of the same type), determine a set of splits that span the given values.]]> </doc> </method> <doc> <![CDATA[DBSplitter will generate DBInputSplits to use with DataDrivenDBInputFormat. DataDrivenDBInputFormat needs to interpolate between two values that represent the lowest and highest valued records to import. Depending on the data-type of the column, this requires different behavior. DBSplitter implementations should perform this for a data type or family of data types.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.lib.db.DBSplitter --> <!-- start interface org.apache.hadoop.mapreduce.lib.db.DBWritable --> <interface name="DBWritable" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="statement" type="java.sql.PreparedStatement"/> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[Sets the fields of the object in the {@link PreparedStatement}. @param statement the statement that the fields are put into. @throws SQLException]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="resultSet" type="java.sql.ResultSet"/> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[Reads the fields of the object from the {@link ResultSet}. @param resultSet the {@link ResultSet} to get the fields from. @throws SQLException]]> </doc> </method> <doc> <![CDATA[Objects that are read from/written to a database should implement <code>DBWritable</code>. DBWritable, is similar to {@link Writable} except that the {@link #write(PreparedStatement)} method takes a {@link PreparedStatement}, and {@link #readFields(ResultSet)} takes a {@link ResultSet}. <p> Implementations are responsible for writing the fields of the object to PreparedStatement, and reading the fields of the object from the ResultSet. <p>Example:</p> If we have the following table in the database : <pre> CREATE TABLE MyTable ( counter INTEGER NOT NULL, timestamp BIGINT NOT NULL, ); </pre> then we can read/write the tuples from/to the table with : <p><pre> public class MyWritable implements Writable, DBWritable { // Some data private int counter; private long timestamp; //Writable#write() implementation public void write(DataOutput out) throws IOException { out.writeInt(counter); out.writeLong(timestamp); } //Writable#readFields() implementation public void readFields(DataInput in) throws IOException { counter = in.readInt(); timestamp = in.readLong(); } public void write(PreparedStatement statement) throws SQLException { statement.setInt(1, counter); statement.setLong(2, timestamp); } public void readFields(ResultSet resultSet) throws SQLException { counter = resultSet.getInt(1); timestamp = resultSet.getLong(2); } } </pre></p>]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.lib.db.DBWritable --> <!-- start class org.apache.hadoop.mapreduce.lib.db.FloatSplitter --> <class name="FloatSplitter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> <constructor name="FloatSplitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="split" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="results" type="java.sql.ResultSet"/> <param name="colName" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <doc> <![CDATA[Implement DBSplitter over floating-point values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.FloatSplitter --> <!-- start class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter --> <class name="IntegerSplitter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.db.DBSplitter"/> <constructor name="IntegerSplitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="split" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="results" type="java.sql.ResultSet"/> <param name="colName" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <doc> <![CDATA[Implement DBSplitter over integer values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.IntegerSplitter --> <!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader --> <class name="MySQLDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MySQLDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> </constructor> <method name="executeQuery" return="java.sql.ResultSet" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="query" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <doc> <![CDATA[A RecordReader that reads records from a MySQL table via DataDrivenDBRecordReader]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDataDrivenDBRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader --> <class name="MySQLDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MySQLDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> </constructor> <method name="executeQuery" return="java.sql.ResultSet" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="query" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> </method> <doc> <![CDATA[A RecordReader that reads records from a MySQL table.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.MySQLDBRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat --> <class name="OracleDataDrivenDBInputFormat" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="OracleDataDrivenDBInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getSplitter" return="org.apache.hadoop.mapreduce.lib.db.DBSplitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="sqlDataType" type="int"/> <doc> <![CDATA[@return the DBSplitter implementation to use to divide the table/query into InputSplits.]]> </doc> </method> <method name="createDBRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A InputFormat that reads input data from an SQL table in an Oracle db.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader --> <class name="OracleDataDrivenDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DataDrivenDBRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="OracleDataDrivenDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> </constructor> <doc> <![CDATA[A RecordReader that reads records from a Oracle table via DataDrivenDBRecordReader]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter --> <class name="OracleDateSplitter" extends="org.apache.hadoop.mapreduce.lib.db.DateSplitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="OracleDateSplitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="dateToString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="d" type="java.util.Date"/> </method> <doc> <![CDATA[Implement DBSplitter over date/time values returned by an Oracle db. Make use of logic from DateSplitter, since this just needs to use some Oracle-specific functions on the formatting end when generating InputSplits.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDateSplitter --> <!-- start class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader --> <class name="OracleDBRecordReader" extends="org.apache.hadoop.mapreduce.lib.db.DBRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="OracleDBRecordReader" type="org.apache.hadoop.mapreduce.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.conf.Configuration, java.sql.Connection, org.apache.hadoop.mapreduce.lib.db.DBConfiguration, java.lang.String, java.lang.String[], java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="SQLException" type="java.sql.SQLException"/> </constructor> <method name="getSelectQuery" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Returns the query for selecting the records from an Oracle DB.]]> </doc> </method> <method name="setSessionTimeZone" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="conn" type="java.sql.Connection"/> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[Set session time zone @param conf The current configuration. We read the 'oracle.sessionTimeZone' property from here. @param conn The connection to alter the timezone properties of.]]> </doc> </method> <field name="SESSION_TIMEZONE_KEY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Configuration key to set to a timezone string.]]> </doc> </field> <doc> <![CDATA[A RecordReader that reads records from an Oracle SQL table.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.OracleDBRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.db.TextSplitter --> <class name="TextSplitter" extends="org.apache.hadoop.mapreduce.lib.db.BigDecimalSplitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TextSplitter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="split" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="results" type="java.sql.ResultSet"/> <param name="colName" type="java.lang.String"/> <exception name="SQLException" type="java.sql.SQLException"/> <doc> <![CDATA[This method needs to determine the splits between two user-provided strings. In the case where the user's strings are 'A' and 'Z', this is not hard; we could create two splits from ['A', 'M') and ['M', 'Z'], 26 splits for strings beginning with each letter, etc. If a user has provided us with the strings "Ham" and "Haze", however, we need to create splits that differ in the third letter. The algorithm used is as follows: Since there are 2**16 unicode characters, we interpret characters as digits in base 65536. Given a string 's' containing characters s_0, s_1 .. s_n, we interpret the string as the number: 0.s_0 s_1 s_2.. s_n in base 65536. Having mapped the low and high strings into floating-point values, we then use the BigDecimalSplitter to establish the even split points, then map the resulting floating point values back into strings.]]> </doc> </method> <doc> <![CDATA[Implement DBSplitter over text strings.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.db.TextSplitter --> </package> <package name="org.apache.hadoop.mapreduce.lib.fieldsel"> <!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper --> <class name="FieldSelectionHelper" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FieldSelectionHelper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="FieldSelectionHelper" type="org.apache.hadoop.io.Text, org.apache.hadoop.io.Text" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="parseOutputKeyValueSpec" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="keyValueSpec" type="java.lang.String"/> <param name="keyFieldList" type="java.util.List"/> <param name="valueFieldList" type="java.util.List"/> </method> <method name="specToString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="fieldSeparator" type="java.lang.String"/> <param name="keyValueSpec" type="java.lang.String"/> <param name="allValueFieldsFrom" type="int"/> <param name="keyFieldList" type="java.util.List"/> <param name="valueFieldList" type="java.util.List"/> </method> <method name="getKey" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getValue" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="extractOutputKeyValue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.String"/> <param name="val" type="java.lang.String"/> <param name="fieldSep" type="java.lang.String"/> <param name="keyFieldList" type="java.util.List"/> <param name="valFieldList" type="java.util.List"/> <param name="allValueFieldsFrom" type="int"/> <param name="ignoreKey" type="boolean"/> <param name="isMap" type="boolean"/> </method> <field name="emptyText" type="org.apache.hadoop.io.Text" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <field name="DATA_FIELD_SEPERATOR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="MAP_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="REDUCE_OUTPUT_KEY_VALUE_SPEC" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements a mapper/reducer class that can be used to perform field selections in a manner similar to unix cut. The input data is treated as fields separated by a user specified separator (the default value is "\t"). The user can specify a list of fields that form the map output keys, and a list of fields that form the map output values. If the inputformat is TextInputFormat, the mapper will ignore the key to the map function. and the fields are from the value only. Otherwise, the fields are the union of those from the key and those from the value. The field separator is under attribute "mapreduce.fieldsel.data.field.separator" The map output field list spec is under attribute "mapreduce.fieldsel.map.output.key.value.fields.spec". The value is expected to be like "keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range (like 2-5) to specify a range of fields, or an open range (like 3-) specifying all the fields starting from field 3. The open range field spec applies value fields only. They have no effect on the key fields. Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values. The reduce output field list spec is under attribute "mapreduce.fieldsel.reduce.output.key.value.fields.spec". The reducer extracts output key/value pairs in a similar manner, except that the key is never ignored.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionHelper --> <!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper --> <class name="FieldSelectionMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FieldSelectionMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="val" type="V"/> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[The identify function. Input key/value pair is written directly to output.]]> </doc> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements a mapper class that can be used to perform field selections in a manner similar to unix cut. The input data is treated as fields separated by a user specified separator (the default value is "\t"). The user can specify a list of fields that form the map output keys, and a list of fields that form the map output values. If the inputformat is TextInputFormat, the mapper will ignore the key to the map function. and the fields are from the value only. Otherwise, the fields are the union of those from the key and those from the value. The field separator is under attribute "mapreduce.fieldsel.data.field.separator" The map output field list spec is under attribute "mapreduce.fieldsel.map.output.key.value.fields.spec". The value is expected to be like "keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range (like 2-5) to specify a range of fields, or an open range (like 3-) specifying all the fields starting from field 3. The open range field spec applies value fields only. They have no effect on the key fields. Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer --> <class name="FieldSelectionReducer" extends="org.apache.hadoop.mapreduce.Reducer" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FieldSelectionReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="values" type="java.lang.Iterable"/> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class implements a reducer class that can be used to perform field selections in a manner similar to unix cut. The input data is treated as fields separated by a user specified separator (the default value is "\t"). The user can specify a list of fields that form the reduce output keys, and a list of fields that form the reduce output values. The fields are the union of those from the key and those from the value. The field separator is under attribute "mapreduce.fieldsel.data.field.separator" The reduce output field list spec is under attribute "mapreduce.fieldsel.reduce.output.key.value.fields.spec". The value is expected to be like "keyFieldsSpec:valueFieldsSpec" key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ... Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range (like 2-5) to specify a range of fields, or an open range (like 3-) specifying all the fields starting from field 3. The open range field spec applies value fields only. They have no effect on the key fields. Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys, and use fields 6,5,1,2,3,7 and above for values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.fieldsel.FieldSelectionReducer --> </package> <package name="org.apache.hadoop.mapreduce.lib.input"> <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat --> <class name="CombineFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="CombineFileInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[default constructor]]> </doc> </constructor> <method name="setMaxSplitSize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="maxSplitSize" type="long"/> <doc> <![CDATA[Specify the maximum size (in bytes) of each split. Each split is approximately equal to the specified size.]]> </doc> </method> <method name="setMinSplitSizeNode" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="minSplitSizeNode" type="long"/> <doc> <![CDATA[Specify the minimum size (in bytes) of each split per node. This applies to data that is left over after combining data on a single node into splits that are of maximum size specified by maxSplitSize. This leftover data will be combined into its own split if its size exceeds minSplitSizeNode.]]> </doc> </method> <method name="setMinSplitSizeRack" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="minSplitSizeRack" type="long"/> <doc> <![CDATA[Specify the minimum size (in bytes) of each split per rack. This applies to data that is left over after combining data on a single rack into splits that are of maximum size specified by maxSplitSize. This leftover data will be combined into its own split if its size exceeds minSplitSizeRack.]]> </doc> </method> <method name="createPool" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="filters" type="java.util.List"/> <doc> <![CDATA[Create a new pool and add the filters to it. A split cannot have files from different pools.]]> </doc> </method> <method name="createPool" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/> <doc> <![CDATA[Create a new pool and add the filters to it. A pathname can satisfy any one of the specified filters. A split cannot have files from different pools.]]> </doc> </method> <method name="isSplitable" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="file" type="org.apache.hadoop.fs.Path"/> </method> <method name="getSplits" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[This is not implemented yet.]]> </doc> </method> <field name="SPLIT_MINSIZE_PERNODE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="SPLIT_MINSIZE_PERRACK" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[An abstract {@link InputFormat} that returns {@link CombineFileSplit}'s in {@link InputFormat#getSplits(JobContext)} method. Splits are constructed from the files under the input paths. A split cannot have files from different pools. Each split returned may contain blocks from different files. If a maxSplitSize is specified, then blocks on the same node are combined to form a single split. Blocks that are left over are then combined with other blocks in the same rack. If maxSplitSize is not specified, then blocks from the same rack are combined in a single split; no attempt is made to create node-local splits. If the maxSplitSize is equal to the block size, then this class is similar to the default splitting behavior in Hadoop: each block is a locally processed split. Subclasses implement {@link InputFormat#createRecordReader(InputSplit, TaskAttemptContext)} to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s. @see CombineFileSplit]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader --> <class name="CombineFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit, org.apache.hadoop.mapreduce.TaskAttemptContext, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[A generic RecordReader that can hand out different recordReaders for each chunk in the CombineFileSplit.]]> </doc> </constructor> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[return progress based on the amount of data processed so far.]]> </doc> </method> <method name="initNextRecordReader" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]> </doc> </method> <field name="split" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="rrClass" type="java.lang.Class" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="rrConstructor" type="java.lang.reflect.Constructor" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="fs" type="org.apache.hadoop.fs.FileSystem" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="idx" type="int" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="progress" type="long" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="curReader" type="org.apache.hadoop.mapreduce.RecordReader" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[A generic RecordReader that can hand out different recordReaders for each chunk in a {@link CombineFileSplit}. A CombineFileSplit can combine data chunks from multiple files. This class allows using different RecordReaders for processing these data chunks from different files. @see CombineFileSplit]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit --> <class name="CombineFileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="CombineFileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[default constructor]]> </doc> </constructor> <constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CombineFileSplit" type="org.apache.hadoop.fs.Path[], long[]" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CombineFileSplit" type="org.apache.hadoop.mapreduce.lib.input.CombineFileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Copy constructor]]> </doc> </constructor> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getStartOffsets" return="long[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns an array containing the start offsets of the files in the split]]> </doc> </method> <method name="getLengths" return="long[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns an array containing the lengths of the files in the split]]> </doc> </method> <method name="getOffset" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <doc> <![CDATA[Returns the start offset of the i<sup>th</sup> Path]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <doc> <![CDATA[Returns the length of the i<sup>th</sup> Path]]> </doc> </method> <method name="getNumPaths" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the number of Paths in the split]]> </doc> </method> <method name="getPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <doc> <![CDATA[Returns the i<sup>th</sup> Path]]> </doc> </method> <method name="getPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns all the Paths in the split]]> </doc> </method> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Returns all the Paths where this input-split resides]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, CombineFileSplit class does not represent a split of a file, but a split of input files into smaller sets. A split may contain blocks from different file but all the blocks in the same split are probably local to some rack <br> CombineFileSplit can be used to implement {@link RecordReader}'s, with reading one record per file. @see FileSplit @see CombineFileInputFormat]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.CombineFileSplit --> <!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormat --> <class name="FileInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FileInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getFormatMinSplitSize" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Get the lower bound on split size imposed by the format. @return the number of bytes of the minimal split for this format]]> </doc> </method> <method name="isSplitable" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="filename" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Is the given filename splitable? Usually, true, but if the file is stream compressed, it will not be. <code>FileInputFormat</code> implementations can override this and return <code>false</code> to ensure that individual input files are never split-up so that {@link Mapper}s process entire files. @param context the job context @param filename the file name to check @return is this file splitable?]]> </doc> </method> <method name="setInputPathFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="filter" type="java.lang.Class"/> <doc> <![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job. @param job the job to modify @param filter the PathFilter class use for filtering the input paths.]]> </doc> </method> <method name="setMinInputSplitSize" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="size" type="long"/> <doc> <![CDATA[Set the minimum input split size @param job the job to modify @param size the minimum size]]> </doc> </method> <method name="getMinSplitSize" return="long" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the minimum split size @param job the job @return the minimum number of bytes that can be in a split]]> </doc> </method> <method name="setMaxInputSplitSize" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="size" type="long"/> <doc> <![CDATA[Set the maximum split size @param job the job to modify @param size the maximum split size]]> </doc> </method> <method name="getMaxSplitSize" return="long" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the maximum split size. @param context the job to look at. @return the maximum number of bytes a split can include]]> </doc> </method> <method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get a PathFilter instance of the filter set for the input paths. @return the PathFilter instance set for the job, NULL if none has been set.]]> </doc> </method> <method name="listStatus" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[List input directories. Subclasses may override to, e.g., select only files matching a regular expression. @param job the job to list input paths for @return array of FileStatus objects @throws IOException if zero items.]]> </doc> </method> <method name="makeSplit" return="org.apache.hadoop.mapreduce.lib.input.FileSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="file" type="org.apache.hadoop.fs.Path"/> <param name="start" type="long"/> <param name="length" type="long"/> <param name="hosts" type="java.lang.String[]"/> <doc> <![CDATA[A factory that makes the split for this class. It can be overridden by sub-classes to make sub-types]]> </doc> </method> <method name="getSplits" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Generate the list of files and make them into FileSplits. @param job the job context @throws IOException]]> </doc> </method> <method name="computeSplitSize" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="blockSize" type="long"/> <param name="minSize" type="long"/> <param name="maxSize" type="long"/> </method> <method name="getBlockIndex" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/> <param name="offset" type="long"/> </method> <method name="setInputPaths" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="commaSeparatedPaths" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Sets the given comma separated paths as the list of inputs for the map-reduce job. @param job the job @param commaSeparatedPaths Comma separated paths to be set as the list of inputs for the map-reduce job.]]> </doc> </method> <method name="addInputPaths" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="commaSeparatedPaths" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add the given comma separated paths to the list of inputs for the map-reduce job. @param job The job to modify @param commaSeparatedPaths Comma separated paths to be added to the list of inputs for the map-reduce job.]]> </doc> </method> <method name="setInputPaths" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Set the array of {@link Path}s as the list of inputs for the map-reduce job. @param job The job to modify @param inputPaths the {@link Path}s of the input directories/files for the map-reduce job.]]> </doc> </method> <method name="addInputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job. @param job The {@link Job} to modify @param path {@link Path} to be added to the list of inputs for the map-reduce job.]]> </doc> </method> <method name="getInputPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the list of input {@link Path}s for the map-reduce job. @param context The job @return the list of input {@link Path}s for the map-reduce job.]]> </doc> </method> <field name="COUNTER_GROUP" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="BYTES_READ" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="INPUT_DIR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="SPLIT_MAXSIZE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="SPLIT_MINSIZE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="PATHFILTER_CLASS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="NUM_INPUT_FILES" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[A base class for file-based {@link InputFormat}s. <p><code>FileInputFormat</code> is the base class for all file-based <code>InputFormat</code>s. This provides a generic implementation of {@link #getSplits(JobContext)}. Subclasses of <code>FileInputFormat</code> can also override the {@link #isSplitable(JobContext, Path)} method to ensure input-files are not split-up and are processed as a whole by {@link Mapper}s.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.input.FileSplit --> <class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="FileSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructs a split with host information @param file the file name @param start the position of the first byte in the file to process @param length the number of bytes in the file to process @param hosts the list of hosts containing the block, possibly null]]> </doc> </constructor> <method name="getPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The file containing this split's data.]]> </doc> </method> <method name="getStart" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The position of the first byte in the file to process.]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The number of bytes in the file to process.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A section of an input file. Returned by {@link InputFormat#getSplits(JobContext)} and passed to {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.FileSplit --> <!-- start class org.apache.hadoop.mapreduce.lib.input.InvalidInputException --> <class name="InvalidInputException" extends="java.io.IOException" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="InvalidInputException" type="java.util.List" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create the exception with the given list. @param probs the list of problems to report. this list is not copied.]]> </doc> </constructor> <method name="getProblems" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the complete list of the problems reported. @return the list of problems, which must not be modified]]> </doc> </method> <method name="getMessage" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get a summary message of the problems found. @return the concatenated messages from all of the problems.]]> </doc> </method> <doc> <![CDATA[This class wraps a list of problems with the input, so that the user can get a list of problems together instead of finding and fixing them one by one.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.InvalidInputException --> <!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader --> <class name="KeyValueLineRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="findSeparator" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="utf" type="byte[]"/> <param name="start" type="int"/> <param name="length" type="int"/> <param name="sep" type="byte"/> </method> <method name="setKeyValue" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.Text"/> <param name="value" type="org.apache.hadoop.io.Text"/> <param name="line" type="byte[]"/> <param name="lineLen" type="int"/> <param name="pos" type="int"/> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Read key/value pair in a line.]]> </doc> </method> <method name="getCurrentKey" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getCurrentValue" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <field name="KEY_VALUE_SEPERATOR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class treats a line in the input as a key/value pair separated by a separator character. The separator can be specified in config file under the attribute name mapreduce.input.keyvaluelinerecordreader.key.value.separator. The default separator is the tab character ('\t').]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat --> <class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="KeyValueTextInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="isSplitable" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="file" type="org.apache.hadoop.fs.Path"/> </method> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. Either line feed or carriage-return are used to signal end of line. Each line is divided into key and value parts by a separator byte. If no such a byte exists, the key will be the entire line and value will be empty.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.input.MultipleInputs --> <class name="MultipleInputs" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MultipleInputs" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="addInputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="inputFormatClass" type="java.lang.Class"/> <doc> <![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of inputs for the map-reduce job. @param job The {@link Job} @param path {@link Path} to be added to the list of inputs for the job @param inputFormatClass {@link InputFormat} class to use for this path]]> </doc> </method> <method name="addInputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="inputFormatClass" type="java.lang.Class"/> <param name="mapperClass" type="java.lang.Class"/> <doc> <![CDATA[Add a {@link Path} with a custom {@link InputFormat} and {@link Mapper} to the list of inputs for the map-reduce job. @param job The {@link Job} @param path {@link Path} to be added to the list of inputs for the job @param inputFormatClass {@link InputFormat} class to use for this path @param mapperClass {@link Mapper} class to use for this path]]> </doc> </method> <field name="DIR_FORMATS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="DIR_MAPPERS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class supports MapReduce jobs that have multiple input paths with a different {@link InputFormat} and {@link Mapper} for each path]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.MultipleInputs --> <!-- start class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat --> <class name="NLineInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="NLineInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getSplits" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Logically splits the set of input files for the job, splits N lines of the input as one split. @see FileInputFormat#getSplits(JobContext)]]> </doc> </method> <method name="getSplitsForFile" return="java.util.List" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="status" type="org.apache.hadoop.fs.FileStatus"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="numLinesPerSplit" type="int"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setNumLinesPerSplit" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="numLines" type="int"/> <doc> <![CDATA[Set the number of lines per split @param job the job to modify @param numLines the number of lines per split]]> </doc> </method> <method name="getNumLinesPerSplit" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the number of lines per split @param job the job @return the number of lines per split]]> </doc> </method> <field name="LINES_PER_MAP" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[NLineInputFormat which splits N lines of input as one split. In many "pleasantly" parallel applications, each process/mapper processes the same input file (s), but with computations are controlled by different parameters.(Referred to as "parameter sweeps"). One way to achieve this, is to specify a set of parameters (one set per line) as input in a control file (which is the input path to the map-reduce application, where as the input dataset is specified via a config variable in JobConf.). The NLineInputFormat can be used in such applications, that splits the input file such that by default, one line is fed as a value to one map task, and key is the offset. i.e. (k,v) is (LongWritable, Text). The location hints will span the whole mapred cluster.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.NLineInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat --> <class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileAsBinaryInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw) format.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader --> <class name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentKey" return="org.apache.hadoop.io.BytesWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentValue" return="org.apache.hadoop.io.BytesWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getKeyClassName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Retrieve the name of the key class for this SequenceFile. @see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName]]> </doc> </method> <method name="getValueClassName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Retrieve the name of the value class for this SequenceFile. @see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName]]> </doc> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Read raw bytes from a SequenceFile.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Return the progress within the input split @return 0.0 to 1.0 of the input byte range]]> </doc> </method> <doc> <![CDATA[Read records from a SequenceFile as binary (raw) bytes.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat --> <class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileAsTextInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[This class is similar to SequenceFileInputFormat, except it generates SequenceFileAsTextRecordReader which converts the input keys and values to their String forms by calling toString() method.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader --> <class name="SequenceFileAsTextRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileAsTextRecordReader" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentKey" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentValue" return="org.apache.hadoop.io.Text" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Read key/value pair in a line.]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[This class converts the input keys and values to their String forms by calling toString() method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader class to TextInputFormat class.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter --> <class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create a record reader for the given split @param split file split @param context the task-attempt context @return RecordReader]]> </doc> </method> <method name="setFilterClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="filterClass" type="java.lang.Class"/> <doc> <![CDATA[set the filter class @param job The job @param filterClass filter class]]> </doc> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="FILTER_CLASS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="FILTER_FREQUENCY" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="FILTER_REGEX" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[A class that allows a map/red job to work on a sample of sequence files. The sample is decided by the filter class set by the job.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter --> <!-- start interface org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter --> <interface name="SequenceFileInputFilter.Filter" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <doc> <![CDATA[filter function Decide if a record should be filtered or not @param key record key @return true if a record is accepted; return false otherwise]]> </doc> </method> <doc> <![CDATA[filter interface]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase --> <class name="SequenceFileInputFilter.FilterBase" extends="java.lang.Object" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.Filter"/> <constructor name="SequenceFileInputFilter.FilterBase" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[base class for Filters]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.MD5Filter --> <class name="SequenceFileInputFilter.MD5Filter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFilter.MD5Filter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setFrequency" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="frequency" type="int"/> <doc> <![CDATA[set the filtering frequency in configuration @param conf configuration @param frequency filtering frequency]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[configure the filter according to configuration @param conf configuration]]> </doc> </method> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <doc> <![CDATA[Filtering method If MD5(key) % frequency==0, return true; otherwise return false @see Filter#accept(Object)]]> </doc> </method> <field name="MD5_LEN" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class returns a set of records by examing the MD5 digest of its key against a filtering frequency <i>f</i>. The filtering criteria is MD5(key) % f == 0.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.MD5Filter --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.PercentFilter --> <class name="SequenceFileInputFilter.PercentFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFilter.PercentFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setFrequency" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="frequency" type="int"/> <doc> <![CDATA[set the frequency and stores it in conf @param conf configuration @param frequency filtering frequencey]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[configure the filter by checking the configuration @param conf configuration]]> </doc> </method> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <doc> <![CDATA[Filtering method If record# % frequency==0, return true; otherwise return false @see Filter#accept(Object)]]> </doc> </method> <doc> <![CDATA[This class returns a percentage of records The percentage is determined by a filtering frequency <i>f</i> using the criteria record# % f == 0. For example, if the frequency is 10, one out of 10 records is returned.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.PercentFilter --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.RegexFilter --> <class name="SequenceFileInputFilter.RegexFilter" extends="org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.FilterBase" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFilter.RegexFilter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setPattern" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="regex" type="java.lang.String"/> <exception name="PatternSyntaxException" type="java.util.regex.PatternSyntaxException"/> <doc> <![CDATA[Define the filtering regex and stores it in conf @param conf where the regex is set @param regex regex used as a filter]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[configure the Filter by checking the configuration]]> </doc> </method> <method name="accept" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <doc> <![CDATA[Filtering method If key matches the regex, return true; otherwise return false @see Filter#accept(Object)]]> </doc> </method> <doc> <![CDATA[Records filter by matching key to regex]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFilter.RegexFilter --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat --> <class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getFormatMinSplitSize" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="listStatus" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader --> <class name="SequenceFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileRecordReader" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getCurrentValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return the progress within the input split @return 0.0 to 1.0 of the input byte range]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <field name="conf" type="org.apache.hadoop.conf.Configuration" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.input.TextInputFormat --> <class name="TextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TextInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> </method> <method name="isSplitable" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="file" type="org.apache.hadoop.fs.Path"/> </method> <doc> <![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines. Either linefeed or carriage-return are used to signal end of line. Keys are the position in the file, and values are the line of text..]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.input.TextInputFormat --> </package> <package name="org.apache.hadoop.mapreduce.lib.jobcontrol"> <!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob --> <class name="ControlledJob" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ControlledJob" type="org.apache.hadoop.mapreduce.Job, java.util.List" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Construct a job. @param job a mapreduce job to be executed. @param dependingJobs an array of jobs the current job depends on]]> </doc> </constructor> <constructor name="ControlledJob" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Construct a job. @param conf mapred job configuration representing a job to be executed. @throws IOException]]> </doc> </constructor> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the job name of this job]]> </doc> </method> <method name="setJobName" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobName" type="java.lang.String"/> <doc> <![CDATA[Set the job name for this job. @param jobName the job name]]> </doc> </method> <method name="getJobID" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the job ID of this job assigned by JobControl]]> </doc> </method> <method name="setJobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="id" type="java.lang.String"/> <doc> <![CDATA[Set the job ID for this job. @param id the job ID]]> </doc> </method> <method name="getMapredJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the mapred ID of this job as assigned by the mapred framework.]]> </doc> </method> <method name="getJob" return="org.apache.hadoop.mapreduce.Job" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the mapreduce job]]> </doc> </method> <method name="setJob" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <doc> <![CDATA[Set the mapreduce job @param job the mapreduce job for this job.]]> </doc> </method> <method name="getJobState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the state of this job]]> </doc> </method> <method name="setJobState" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="state" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State"/> <doc> <![CDATA[Set the state for this job. @param state the new state for this job.]]> </doc> </method> <method name="getMessage" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the message of this job]]> </doc> </method> <method name="setMessage" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="message" type="java.lang.String"/> <doc> <![CDATA[Set the message for this job. @param message the message for this job.]]> </doc> </method> <method name="getDependentJobs" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the depending jobs of this job]]> </doc> </method> <method name="addDependingJob" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="dependingJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/> <doc> <![CDATA[Add a job to this jobs' dependency list. Dependent jobs can only be added while a Job is waiting to run, not during or afterwards. @param dependingJob Job that this Job depends on. @return <tt>true</tt> if the Job was added.]]> </doc> </method> <method name="isCompleted" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return true if this job is in a complete state]]> </doc> </method> <method name="isReady" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return true if this job is in READY state]]> </doc> </method> <method name="killJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="submit" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Submit this job to mapred. The state becomes RUNNING if submission is successful, FAILED otherwise.]]> </doc> </method> <field name="CREATE_DIR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This class encapsulates a MapReduce job and its dependency. It monitors the states of the depending jobs and updates the state of this job. A job starts in the WAITING state. If it does not have any depending jobs, or all of the depending jobs are in SUCCESS state, then the job state will become READY. If any depending jobs fail, the job will fail too. When in READY state, the job can be submitted to Hadoop for execution, with the state changing into RUNNING state. From RUNNING state, the job can get into SUCCESS or FAILED state, depending the status of the job execution.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob --> <!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State --> <class name="ControlledJob.State" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob.State --> <!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl --> <class name="JobControl" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.lang.Runnable"/> <constructor name="JobControl" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a job control for a group of jobs. @param groupName a name identifying this group]]> </doc> </constructor> <method name="getWaitingJobList" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the waiting state]]> </doc> </method> <method name="getRunningJobList" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the running state]]> </doc> </method> <method name="getReadyJobsList" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the ready state]]> </doc> </method> <method name="getSuccessfulJobList" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the jobs in the success state]]> </doc> </method> <method name="getFailedJobList" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="addJob" return="java.lang.String" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="aJob" type="org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob"/> <doc> <![CDATA[Add a new job. @param aJob the new job]]> </doc> </method> <method name="addJobCollection" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jobs" type="java.util.Collection"/> <doc> <![CDATA[Add a collection of jobs @param jobs]]> </doc> </method> <method name="getThreadState" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return the thread state]]> </doc> </method> <method name="stop" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[set the thread state to STOPPING so that the thread will stop when it wakes up.]]> </doc> </method> <method name="suspend" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[suspend the running thread]]> </doc> </method> <method name="resume" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[resume the suspended thread]]> </doc> </method> <method name="allFinished" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The main loop for the thread. The loop does the following: Check the states of the running jobs Update the states of waiting jobs Submit the jobs in ready state]]> </doc> </method> <doc> <![CDATA[This class encapsulates a set of MapReduce jobs and its dependency. It tracks the states of the jobs by placing them into different tables according to their states. This class provides APIs for the client app to add a job to the group and to get the jobs in the group in different states. When a job is added, an ID unique to the group is assigned to the job. This class has a thread that submits jobs when they become ready, monitors the states of the running jobs, and updates the states of jobs based on the state changes of their depending jobs states. The class provides APIs for suspending/resuming the thread, and for stopping the thread.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl --> <!-- start class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState --> <class name="JobControl.ThreadState" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl.ThreadState --> </package> <package name="org.apache.hadoop.mapreduce.lib.join"> <!-- start class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator --> <class name="ArrayListBackedIterator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> <constructor name="ArrayListBackedIterator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="ArrayListBackedIterator" type="java.util.ArrayList" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="X"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="X"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="X"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[This class provides an implementation of ResetableIterator. The implementation uses an {@link java.util.ArrayList} to store elements added to it, replaying them as requested. Prefer {@link StreamBackedIterator}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.ArrayListBackedIterator --> <!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat --> <class name="ComposableInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ComposableInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <doc> <![CDATA[Refinement of InputFormat requiring implementors to provide ComposableRecordReader instead of RecordReader.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader --> <class name="ComposableRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.lang.Comparable"/> <constructor name="ComposableRecordReader" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[Additional operations required of a RecordReader to participate in a join.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat --> <class name="CompositeInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="CompositeInputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setFormat" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Interpret a given string as a composite expression. {@code func ::= <ident>([<func>,]*<func>) func ::= tbl(<class>,"<path>") class ::= @see java.lang.Class#forName(java.lang.String) path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) } Reads expression from the <tt>mapreduce.join.expr</tt> property and user-supplied join types from <tt>mapreduce.join.define.<ident></tt> types. Paths supplied to <tt>tbl</tt> are given as input paths to the InputFormat class listed. @see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]> </doc> </method> <method name="addDefaults" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Adds the default set of identifiers to the parser.]]> </doc> </method> <method name="getSplits" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the ith split from each child to the ith composite split.]]> </doc> </method> <method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Construct a CompositeRecordReader for the children of this InputFormat as defined in the init expression. The outermost join need only be composable, not necessarily a composite. Mandating TupleWritable isn't strictly correct.]]> </doc> </method> <method name="compose" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="inf" type="java.lang.Class"/> <param name="path" type="java.lang.String"/> <doc> <![CDATA[Convenience method for constructing composite formats. Given InputFormat class (inf), path (p) return: {@code tbl(<inf>, <p>) }]]> </doc> </method> <method name="compose" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="op" type="java.lang.String"/> <param name="inf" type="java.lang.Class"/> <param name="path" type="java.lang.String[]"/> <doc> <![CDATA[Convenience method for constructing composite formats. Given operation (op), Object class (inf), set of paths (p) return: {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> </doc> </method> <method name="compose" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="op" type="java.lang.String"/> <param name="inf" type="java.lang.Class"/> <param name="path" type="org.apache.hadoop.fs.Path[]"/> <doc> <![CDATA[Convenience method for constructing composite formats. Given operation (op), Object class (inf), set of paths (p) return: {@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]> </doc> </method> <field name="JOIN_EXPR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="JOIN_COMPARATOR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[An InputFormat capable of performing joins over a set of data sources sorted and partitioned the same way. @see #setFormat A user may define new join types by setting the property <tt>mapreduce.join.define.<ident></tt> to a classname. In the expression <tt>mapreduce.join.expr</tt>, the identifier will be assumed to be a ComposableRecordReader. <tt>mapreduce.join.keycomparator</tt> can be a classname used to compare keys in the join. @see JoinRecordReader @see MultiFilterRecordReader]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit --> <class name="CompositeInputSplit" extends="org.apache.hadoop.mapreduce.InputSplit" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <constructor name="CompositeInputSplit" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CompositeInputSplit" type="int" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="s" type="org.apache.hadoop.mapreduce.InputSplit"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Add an InputSplit to this collection. @throws IOException If capacity was not specified during construction or if capacity has been reached.]]> </doc> </method> <method name="get" return="org.apache.hadoop.mapreduce.InputSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <doc> <![CDATA[Get ith child InputSplit.]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Return the aggregate length of all child InputSplits currently added.]]> </doc> </method> <method name="getLength" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the length of ith child InputSplit.]]> </doc> </method> <method name="getLocations" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Collect a set of hosts from all child InputSplits.]]> </doc> </method> <method name="getLocation" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[getLocations from ith InputSplit.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Write splits in the following format. {@code <count><class1><class2>...<classn><split1><split2>...<splitn> }]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc} @throws IOException If the child InputSplit cannot be read, typically for failing access checks.]]> </doc> </method> <doc> <![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted into this collection must have a public default constructor.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeInputSplit --> <!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader --> <class name="CompositeRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="CompositeRecordReader" type="int, int, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create a RecordReader with <tt>capacity</tt> children to position <tt>id</tt> in the parent reader. The id of a root CompositeRecordReader is -1 by convention, but relying on this is not recommended.]]> </doc> </constructor> <method name="combine" return="boolean" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="value" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> </method> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="id" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the position in the collector this class occupies.]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="getRecordReaderQueue" return="java.util.PriorityQueue" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return sorted list of RecordReaders for this composite.]]> </doc> </method> <method name="getComparator" return="org.apache.hadoop.io.WritableComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return comparator defining the ordering for RecordReaders in this composite.]]> </doc> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="rr" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Add a RecordReader to this collection. The id() of a RecordReader determines where in the Tuple its entry will appear. Adding RecordReaders with the same id has undefined behavior.]]> </doc> </method> <method name="key" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the key for the current join or the value at the top of the RecordReader heap.]]> </doc> </method> <method name="key" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Clone the key at the top of this RR into the given object.]]> </doc> </method> <method name="getCurrentKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return true if it is possible that this could emit more values.]]> </doc> </method> <method name="skip" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Pass skip key to child RRs.]]> </doc> </method> <method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Obtain an iterator over the child RRs apropos of the value type ultimately emitted from this join.]]> </doc> </method> <method name="accept" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[If key provided matches that of this Composite, give JoinCollector iterator over values it may emit.]]> </doc> </method> <method name="fillJoinCollector" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="iterkey" type="K"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[For all child RRs offering the key provided, obtain an iterator at that position in the JoinCollector.]]> </doc> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/> <doc> <![CDATA[Implement Comparable contract (compare key of join or head of heap with that of another).]]> </doc> </method> <method name="createKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Create a new key common to all child RRs. @throws ClassCastException if key classes differ.]]> </doc> </method> <method name="createTupleWritable" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Create a value to be used internally for joins.]]> </doc> </method> <method name="getCurrentValue" return="X" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close all child RRs.]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Report progress as the minimum of all child RR progress.]]> </doc> </method> <field name="conf" type="org.apache.hadoop.conf.Configuration" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="keyclass" type="java.lang.Class" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="jc" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector" transient="false" volatile="false" static="false" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="kids" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader[]" transient="false" volatile="false" static="false" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="key" type="K" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="value" type="X" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key type and partitioning.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector --> <class name="CompositeRecordReader.JoinCollector" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="CompositeRecordReader.JoinCollector" type="int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a collector capable of handling the specified number of children.]]> </doc> </constructor> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="id" type="int"/> <param name="i" type="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Register a given iterator at position id.]]> </doc> </method> <method name="key" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the key associated with this collection.]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <doc> <![CDATA[Codify the contents of the collector to be iterated over. When this is called, all RecordReaders registered for this key should have added ResetableIterators.]]> </doc> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Clear all state information.]]> </doc> </method> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns false if exhausted or if reset(K) has not been called.]]> </doc> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Populate Tuple from iterators. It should be the case that, given iterators i_1...i_n over values from sources s_1...s_n sharing key k, repeated calls to next should yield I x I.]]> </doc> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Replay the last Tuple emitted.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close all child iterators.]]> </doc> </method> <method name="flush" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="value" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Write the next value into key, value as accepted by the operation associated with this set of RecordReaders.]]> </doc> </method> <doc> <![CDATA[Collector for join values. This accumulates values for a given key from the child RecordReaders. If one or more child RR contain duplicate keys, this will emit the cross product of the associated values until exhausted.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector --> <!-- start class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader --> <class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="combine" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <doc> <![CDATA[Return true iff the tuple is full (all data sources contain this key).]]> </doc> </method> <doc> <![CDATA[Full inner join.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.InnerJoinRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader --> <class name="JoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="JoinRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Emit the next set of key, value pairs as defined by the child RecordReaders and operation associated with this composite RR.]]> </doc> </method> <method name="createValue" return="org.apache.hadoop.mapreduce.lib.join.TupleWritable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return an iterator wrapping the JoinCollector.]]> </doc> </method> <doc> <![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader.JoinDelegationIterator --> <class name="JoinRecordReader.JoinDelegationIterator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> <constructor name="JoinRecordReader.JoinDelegationIterator" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Since the JoinCollector is effecting our operation, we need only provide an iterator proxy wrapping its operation.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.JoinRecordReader.JoinDelegationIterator --> <!-- start class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader --> <class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.conf.Configuration, int, java.lang.Class" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="emit" return="V" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[For each tuple emitted, return a value (typically one of the values in the tuple). Modifying the Writables in the tuple is permitted and unlikely to affect join behavior in most cases, but it is not recommended. It's safer to clone first.]]> </doc> </method> <method name="combine" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <doc> <![CDATA[Default implementation offers {@link #emit} every Tuple from the collector (the outer join of child RRs).]]> </doc> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getDelegate" return="org.apache.hadoop.mapreduce.lib.join.ResetableIterator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Return an iterator returning a single value from the tuple. @see MultiFilterDelegationIterator]]> </doc> </method> <doc> <![CDATA[Base class for Composite join returning values derived from multiple sources, but generally not tuples.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader.MultiFilterDelegationIterator --> <class name="MultiFilterRecordReader.MultiFilterDelegationIterator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> <constructor name="MultiFilterRecordReader.MultiFilterDelegationIterator" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Proxy the JoinCollector, but include callback to emit.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader.MultiFilterDelegationIterator --> <!-- start class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader --> <class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.JoinRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="combine" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="srcs" type="java.lang.Object[]"/> <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <doc> <![CDATA[Emit everything from the collector.]]> </doc> </method> <doc> <![CDATA[Full outer join.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.OuterJoinRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader --> <class name="OverrideRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.MultiFilterRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="emit" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="dst" type="org.apache.hadoop.mapreduce.lib.join.TupleWritable"/> <doc> <![CDATA[Emit the value with the highest position in the tuple.]]> </doc> </method> <method name="createValue" return="V" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="fillJoinCollector" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="iterkey" type="K"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Instead of filling the JoinCollector with iterators from all data sources, fill only the rightmost for this key. This not only saves space by discarding the other sources, but it also emits the number of key-value pairs in the preferred RecordReader instead of repeating that stream n times, where n is the cardinality of the cross product of the discarded streams for the given key.]]> </doc> </method> <doc> <![CDATA[Prefer the "rightmost" data source for this key. For example, <tt>override(S1,S2,S3)</tt> will prefer values from S3 over S2, and values from S2 over S1 for all keys emitted from all sources.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.OverrideRecordReader --> <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser --> <class name="Parser" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Parser" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <doc> <![CDATA[Very simple shift-reduce parser for join expressions. This should be sufficient for the user extension permitted now, but ought to be replaced with a parser generator if more complex grammars are supported. In particular, this "shift-reduce" parser has no states. Each set of formals requires a different internal node type, which is responsible for interpreting the list of tokens it receives. This is sufficient for the current grammar, but it has several annoying properties that might inhibit extension. In particular, parenthesis are always function calls; an algebraic or filter grammar would not only require a node type, but must also work around the internals of this parser. For most other cases, adding classes to the hierarchy- particularly by extending JoinRecordReader and MultiFilterRecordReader- is fairly straightforward. One need only override the relevant method(s) (usually only {@link CompositeRecordReader#combine}) and include a property to map its value to an identifier in the parser.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser --> <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Node --> <class name="Parser.Node" extends="org.apache.hadoop.mapreduce.lib.join.ComposableInputFormat" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Parser.Node" type="java.lang.String" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="addIdentifier" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <param name="ident" type="java.lang.String"/> <param name="mcstrSig" type="java.lang.Class[]"/> <param name="nodetype" type="java.lang.Class"/> <param name="cl" type="java.lang.Class"/> <exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/> <doc> <![CDATA[For a given identifier, add a mapping to the nodetype for the parse tree and to the ComposableRecordReader to be created, including the formals required to invoke the constructor. The nodetype and constructor signature should be filled in from the child node.]]> </doc> </method> <method name="setID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="id" type="int"/> </method> <method name="setKeyComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="cmpcl" type="java.lang.Class"/> </method> <field name="rrCstrMap" type="java.util.Map" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="id" type="int" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="ident" type="java.lang.String" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="cmpcl" type="java.lang.Class" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Node --> <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken --> <class name="Parser.NodeToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NodeToken --> <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken --> <class name="Parser.NumToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Parser.NumToken" type="double" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getNum" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.NumToken --> <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken --> <class name="Parser.StrToken" extends="org.apache.hadoop.mapreduce.lib.join.Parser.Token" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Parser.StrToken" type="org.apache.hadoop.mapreduce.lib.join.Parser.TType, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getStr" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.StrToken --> <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.Token --> <class name="Parser.Token" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <method name="getType" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNode" return="org.apache.hadoop.mapreduce.lib.join.Parser.Node" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getNum" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getStr" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Tagged-union type for tokens from the join expression. @see Parser.TType]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.Token --> <!-- start class org.apache.hadoop.mapreduce.lib.join.Parser.TType --> <class name="Parser.TType" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.lib.join.Parser.TType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.Parser.TType --> <!-- start interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator --> <interface name="ResetableIterator" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[True if a call to next may return a value. This is permitted false positives, but not false negatives.]]> </doc> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="T"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Assign next value to actual. It is required that elements added to a ResetableIterator be returned in the same order after a call to {@link #reset} (FIFO). Note that a call to this may fail for nested joins (i.e. more elements available, but none satisfying the constraints of the join)]]> </doc> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="T"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Assign last value returned to actual.]]> </doc> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Set iterator to return to the start of its range. Must be called after calling {@link #add} to avoid a ConcurrentModificationException.]]> </doc> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="T"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Add an element to the collection of elements to iterate over.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Close datasources and release resources. Calling methods on the iterator after calling close has undefined behavior.]]> </doc> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Close datasources, but do not release internal resources. Calling this method should permit the object to be reused with a different datasource.]]> </doc> </method> <doc> <![CDATA[This defines an interface to a stateful Iterator that can replay elements added to it directly. Note that this does not extend {@link java.util.Iterator}.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.lib.join.ResetableIterator --> <!-- start class org.apache.hadoop.mapreduce.lib.join.ResetableIterator.EMPTY --> <class name="ResetableIterator.EMPTY" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> <constructor name="ResetableIterator.EMPTY" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="U"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="U"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="U"/> <exception name="IOException" type="java.io.IOException"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.ResetableIterator.EMPTY --> <!-- start class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator --> <class name="StreamBackedIterator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.join.ResetableIterator"/> <constructor name="StreamBackedIterator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="next" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="X"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="replay" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="val" type="X"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="add" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="item" type="X"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="clear" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[This class provides an implementation of ResetableIterator. This implementation uses a byte array to store elements added to it.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.StreamBackedIterator --> <!-- start class org.apache.hadoop.mapreduce.lib.join.TupleWritable --> <class name="TupleWritable" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.Writable"/> <implements name="java.lang.Iterable"/> <constructor name="TupleWritable" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create an empty tuple with no allocated storage for writables.]]> </doc> </constructor> <constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Initialize tuple with storage; unknown whether any of them contain "written" values.]]> </doc> </constructor> <method name="has" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <doc> <![CDATA[Return true if tuple has an element at the position provided.]]> </doc> </method> <method name="get" return="org.apache.hadoop.io.Writable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="int"/> <doc> <![CDATA[Get ith Writable from Tuple.]]> </doc> </method> <method name="size" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[The number of children in this Tuple.]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="java.lang.Object"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="iterator" return="java.util.Iterator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return an iterator over the elements in this tuple. Note that this doesn't flatten the tuple; one may receive tuples from this iterator.]]> </doc> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Convert Tuple to String as in the following. <tt>[<child1>,<child2>,...,<childn>]</tt>]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="out" type="java.io.DataOutput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Writes each Writable to <code>out</code>. TupleWritable format: {@code <count><type1><type2>...<typen><obj1><obj2>...<objn> }]]> </doc> </method> <method name="readFields" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="in" type="java.io.DataInput"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <field name="written" type="java.util.BitSet" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s. This is *not* a general-purpose tuple type. In almost all cases, users are encouraged to implement their own serializable types, which can perform better validation and provide more efficient encodings than this class is capable. TupleWritable relies on the join framework for type safety and assumes its instances will rarely be persisted, assumptions not only incompatible with, but contrary to the general case. @see org.apache.hadoop.io.Writable]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.TupleWritable --> <!-- start class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader --> <class name="WrappedRecordReader" extends="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="WrappedRecordReader" type="int" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="createKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Request new key from proxied RR.]]> </doc> </method> <method name="createValue" return="U" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="id" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[{@inheritDoc}]]> </doc> </method> <method name="key" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return the key at the head of this RR.]]> </doc> </method> <method name="key" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="qkey" type="K"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Clone the key at the head of this RR into the object supplied.]]> </doc> </method> <method name="hasNext" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Return true if the RR- including the k,v pair stored in this object- is exhausted.]]> </doc> </method> <method name="skip" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]> </doc> </method> <method name="accept" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="i" type="org.apache.hadoop.mapreduce.lib.join.CompositeRecordReader.JoinCollector"/> <param name="key" type="K"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Add an iterator to the collector at the position occupied by this RecordReader over the values in this stream paired with the key provided (ie register a stream of values from this source matching K with a collector).]]> </doc> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Read the next k,v pair into the head of this object; return true iff the RR and this are exhausted.]]> </doc> </method> <method name="getCurrentKey" return="K" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get current key]]> </doc> </method> <method name="getCurrentValue" return="U" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get current value]]> </doc> </method> <method name="getProgress" return="float" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Request progress from proxied RR.]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Forward close request to proxied RR.]]> </doc> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="org.apache.hadoop.mapreduce.lib.join.ComposableRecordReader"/> <doc> <![CDATA[Implement Comparable contract (compare key at head of proxied RR with that of another).]]> </doc> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="java.lang.Object"/> <doc> <![CDATA[Return true iff compareTo(other) retn true.]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <field name="empty" type="boolean" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="cmp" type="org.apache.hadoop.io.WritableComparator" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[Proxy class for a RecordReader participating in the join framework. This class keeps track of the "head" key-value pair for the provided RecordReader and keeps a store of values matching a key when this source is participating in a join.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.join.WrappedRecordReader --> </package> <package name="org.apache.hadoop.mapreduce.lib.map"> <!-- start class org.apache.hadoop.mapreduce.lib.map.InverseMapper --> <class name="InverseMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="InverseMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[The inverse function. Input keys and values are swapped.]]> </doc> </method> <doc> <![CDATA[A {@link Mapper} that swaps keys and values.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.map.InverseMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper --> <class name="MultithreadedMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MultithreadedMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getNumberOfThreads" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[The number of threads in the thread pool that will run the map function. @param job the job @return the number of threads]]> </doc> </method> <method name="setNumberOfThreads" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="threads" type="int"/> <doc> <![CDATA[Set the number of threads in the pool for running maps. @param job the job to modify @param threads the new number of threads]]> </doc> </method> <method name="getMapperClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the application's mapper class. @param <K1> the map's input key type @param <V1> the map's input value type @param <K2> the map's output key type @param <V2> the map's output value type @param job the job @return the mapper class to run]]> </doc> </method> <method name="setMapperClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="cls" type="java.lang.Class"/> <doc> <![CDATA[Set the application's mapper class. @param <K1> the map input key type @param <V1> the map input value type @param <K2> the map output key type @param <V2> the map output value type @param job the job to modify @param cls the class to use as the mapper]]> </doc> </method> <method name="run" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Run the application's maps using a thread pool.]]> </doc> </method> <field name="NUM_THREADS" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <field name="MAP_CLASS" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper. <p> It can be used instead of the default implementation, @link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU bound in order to improve throughput. <p> Mapper implementations using this MapRunnable must be thread-safe. <p> The Map-Reduce job has to be configured with the mapper to use via {@link #setMapperClass(Configuration, Class)} and the number of thread the thread-pool can use with the {@link #getNumberOfThreads(Configuration) method. The default value is 10 threads. <p>]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.map.RegexMapper --> <class name="RegexMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="RegexMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setup" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="org.apache.hadoop.io.Text"/> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <field name="PATTERN" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <field name="GROUP" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.map.RegexMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper --> <class name="TokenCounterMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TokenCounterMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="java.lang.Object"/> <param name="value" type="org.apache.hadoop.io.Text"/> <param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <doc> <![CDATA[Tokenize the input values and emit each word with a count of 1.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.map.WrappedMapper --> <class name="WrappedMapper" extends="org.apache.hadoop.mapreduce.Mapper" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="WrappedMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getMapContext" return="org.apache.hadoop.mapreduce.Mapper.Context" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="mapContext" type="org.apache.hadoop.mapreduce.MapContext"/> <doc> <![CDATA[Get a wrapped {@link Mapper.Context} for custom implementations. @param mapContext <code>MapContext</code> to be wrapped @return a wrapped <code>Mapper.Context</code> for custom implementations]]> </doc> </method> <doc> <![CDATA[A {@link Mapper} which wraps a given one to allow custom {@link Mapper.Context} implementations.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.map.WrappedMapper --> <!-- start class org.apache.hadoop.mapreduce.lib.map.WrappedMapper.Context --> <class name="WrappedMapper.Context" extends="org.apache.hadoop.mapreduce.Mapper.Context" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="WrappedMapper.Context" type="org.apache.hadoop.mapreduce.MapContext" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the input split for this map.]]> </doc> </method> <method name="getCurrentKey" return="KEYIN" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentValue" return="VALUEIN" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counterName" type="java.lang.Enum"/> </method> <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="groupName" type="java.lang.String"/> <param name="counterName" type="java.lang.String"/> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="KEYOUT"/> <param name="value" type="VALUEOUT"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getStatus" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="msg" type="java.lang.String"/> </method> <method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getArchiveTimestamps" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getCacheArchives" return="java.net.URI[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getCacheFiles" return="java.net.URI[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getCombinerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getConfiguration" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFileTimestamps" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getInputFormatClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getJar" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobSetupCleanupNeeded" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getMapOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapperClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getMaxMapAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMaxReduceAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNumReduceTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutputFormatClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPartitionerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getReducerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getSortComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSymlink" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="progress" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProfileEnabled" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProfileParams" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="isMap" type="boolean"/> </method> <method name="getUser" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getCredentials" return="org.apache.hadoop.security.Credentials" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <field name="mapContext" type="org.apache.hadoop.mapreduce.MapContext" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> </class> <!-- end class org.apache.hadoop.mapreduce.lib.map.WrappedMapper.Context --> </package> <package name="org.apache.hadoop.mapreduce.lib.output"> <!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter --> <class name="FileOutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create a file output committer @param outputPath the job's output path @param context the task's context @throws IOException]]> </doc> </constructor> <method name="setupJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Create the temporary directory that is the root of all of the task work directories. @param context the job's context]]> </doc> </method> <method name="commitJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Delete the temporary directory, including all of the work directories. Create a _SUCCESS file to make it as successful. @param context the job's context]]> </doc> </method> <method name="cleanupJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="abortJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="state" type="org.apache.hadoop.mapreduce.JobStatus.State"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Delete the temporary directory, including all of the work directories. @param context the job's context]]> </doc> </method> <method name="setupTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[No task setup required.]]> </doc> </method> <method name="commitTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Move the files from the work directory to the job output directory @param context the task context]]> </doc> </method> <method name="abortTask" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Delete the work directory @throws IOException]]> </doc> </method> <method name="needsTaskCommit" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Did this task write any files in the work directory? @param context the task's context]]> </doc> </method> <method name="getWorkPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the directory that the task should write results into @return the work directory @throws IOException]]> </doc> </method> <field name="TEMP_DIR_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> <doc> <![CDATA[Temporary directory name]]> </doc> </field> <field name="SUCCEEDED_FILE_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[An {@link OutputCommitter} that commits files specified in job output directory i.e. ${mapreduce.output.fileoutputformat.outputdir}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter --> <!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat --> <class name="FileOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FileOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setCompressOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="compress" type="boolean"/> <doc> <![CDATA[Set whether the output of the job is compressed. @param job the job to modify @param compress should the output of the job be compressed?]]> </doc> </method> <method name="getCompressOutput" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Is the job output compressed? @param job the Job to look in @return <code>true</code> if the job output should be compressed, <code>false</code> otherwise]]> </doc> </method> <method name="setOutputCompressorClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="codecClass" type="java.lang.Class"/> <doc> <![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs. @param job the job to modify @param codecClass the {@link CompressionCodec} to be used to compress the job outputs]]> </doc> </method> <method name="getOutputCompressorClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="defaultValue" type="java.lang.Class"/> <doc> <![CDATA[Get the {@link CompressionCodec} for compressing the job outputs. @param job the {@link Job} to look in @param defaultValue the {@link CompressionCodec} to return if not set @return the {@link CompressionCodec} to be used to compress the job outputs @throws IllegalArgumentException if the class was specified, but not found]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="FileAlreadyExistsException" type="org.apache.hadoop.fs.FileAlreadyExistsException"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setOutputPath" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="outputDir" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Set the {@link Path} of the output directory for the map-reduce job. @param job The job to modify @param outputDir the {@link Path} of the output directory for the map-reduce job.]]> </doc> </method> <method name="getOutputPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the {@link Path} to the output directory for the map-reduce job. @return the {@link Path} to the output directory for the map-reduce job. @see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)]]> </doc> </method> <method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Get the {@link Path} to the task's temporary output directory for the map-reduce job <h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4> <p>Some applications need to create/write-to side-files, which differ from the actual job-outputs. <p>In such cases there could be issues with 2 instances of the same TIP (running simultaneously e.g. speculative tasks) trying to open/write-to the same file (path) on HDFS. Hence the application-writer will have to pick unique names per task-attempt (e.g. using the attemptid, say <tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p> <p>To get around this the Map-Reduce framework helps the application-writer out by maintaining a special <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> sub-directory for each task-attempt on HDFS where the output of the task-attempt goes. On successful completion of the task-attempt the files in the <tt>${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid}</tt> (only) are <i>promoted</i> to <tt>${mapreduce.output.fileoutputformat.outputdir}</tt>. Of course, the framework discards the sub-directory of unsuccessful task-attempts. This is completely transparent to the application.</p> <p>The application-writer can take advantage of this by creating any side-files required in a work directory during execution of his task i.e. via {@link #getWorkOutputPath(TaskInputOutputContext)}, and the framework will move them out similarly - thus she doesn't have to pick unique paths per task-attempt.</p> <p>The entire discussion holds true for maps of jobs with reducer=NONE (i.e. 0 reduces) since output of the map, in that case, goes directly to HDFS.</p> @return the {@link Path} to the task's temporary output directory for the map-reduce job.]]> </doc> </method> <method name="getPathForWorkFile" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/> <param name="name" type="java.lang.String"/> <param name="extension" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Helper function to generate a {@link Path} for a file that is unique for the task within the job output directory. <p>The path can be used to create custom files from within the map and reduce tasks. The path name will be unique for each task. The path parent will be the job output directory.</p>ls <p>This method uses the {@link #getUniqueFile} method to make the file name unique for the task.</p> @param context the context for the task. @param name the name for the file. @param extension the extension for the file @return a unique path accross all tasks of the job.]]> </doc> </method> <method name="getUniqueFile" return="java.lang.String" abstract="false" native="false" synchronized="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <param name="name" type="java.lang.String"/> <param name="extension" type="java.lang.String"/> <doc> <![CDATA[Generate a unique filename, based on the task id, name, and extension @param context the task that is calling this @param name the base filename @param extension the filename extension @return a string like $name-[mrsct]-$id$extension]]> </doc> </method> <method name="getDefaultWorkFile" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <param name="extension" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the default path and filename for the output format. @param context the task context @param extension an extension to add to the filename @return a full path $output/_temporary/$taskid/part-[mr]-$id @throws IOException]]> </doc> </method> <method name="getOutputName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the base output name for the output file.]]> </doc> </method> <method name="setOutputName" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Set the base output name for output file to be created.]]> </doc> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="BASE_OUTPUT_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="PART" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="COMPRESS" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="COMPRESS_CODEC" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="COMPRESS_TYPE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="OUTDIR" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[A base class for {@link OutputFormat}s that read from {@link FileSystem}s.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat --> <class name="FilterOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FilterOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="FilterOutputFormat" type="org.apache.hadoop.mapreduce.OutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a FilterOutputFormat based on the underlying output format. @param baseOut the underlying OutputFormat]]> </doc> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <field name="baseOut" type="org.apache.hadoop.mapreduce.OutputFormat" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[FilterOutputFormat is a convenience class that wraps OutputFormat.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat.FilterRecordWriter --> <class name="FilterOutputFormat.FilterRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <constructor name="FilterOutputFormat.FilterRecordWriter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="FilterOutputFormat.FilterRecordWriter" type="org.apache.hadoop.mapreduce.RecordWriter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <field name="rawWriter" type="org.apache.hadoop.mapreduce.RecordWriter" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[<code>FilterRecordWriter</code> is a convenience wrapper class that extends the {@link RecordWriter}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat.FilterRecordWriter --> <!-- start class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat --> <class name="LazyOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FilterOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="LazyOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setOutputFormatClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the underlying output format for LazyOutputFormat. @param job the {@link Job} to modify @param theClass the underlying class]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <field name="OUTPUT_FORMAT" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[A Convenience class that creates output lazily.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat --> <class name="MapFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MapFileOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="dir" type="org.apache.hadoop.fs.Path"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Open the output generated by this format.]]> </doc> </method> <method name="getEntry" return="org.apache.hadoop.io.Writable" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/> <param name="partitioner" type="org.apache.hadoop.mapreduce.Partitioner"/> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get an entry from output generated by this class.]]> </doc> </method> <doc> <![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes {@link MapFile}s.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs --> <class name="MultipleOutputs" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MultipleOutputs" type="org.apache.hadoop.mapreduce.TaskInputOutputContext" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Creates and initializes multiple outputs support, it should be instantiated in the Mapper/Reducer setup method. @param context the TaskInputOutputContext object]]> </doc> </constructor> <method name="addNamedOutput" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="namedOutput" type="java.lang.String"/> <param name="outputFormatClass" type="java.lang.Class"/> <param name="keyClass" type="java.lang.Class"/> <param name="valueClass" type="java.lang.Class"/> <doc> <![CDATA[Adds a named output for the job. <p/> @param job job to add the named output @param namedOutput named output name, it has to be a word, letters and numbers only, cannot be the word 'part' as that is reserved for the default output. @param outputFormatClass OutputFormat class. @param keyClass key class @param valueClass value class]]> </doc> </method> <method name="setCountersEnabled" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="enabled" type="boolean"/> <doc> <![CDATA[Enables or disables counters for the named outputs. The counters group is the {@link MultipleOutputs} class name. The names of the counters are the same as the named outputs. These counters count the number records written to each output name. By default these counters are disabled. @param job job to enable counters @param enabled indicates if the counters will be enabled or not.]]> </doc> </method> <method name="getCountersEnabled" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Returns if the counters for the named outputs are enabled or not. By default these counters are disabled. @param job the job @return TRUE if the counters are enabled, FALSE if they are disabled.]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="namedOutput" type="java.lang.String"/> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Write key and value to the namedOutput. Output path is a unique file generated for the namedOutput. For example, {namedOutput}-(m|r)-{part-number} @param namedOutput the named output name @param key the key @param value the value]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="namedOutput" type="java.lang.String"/> <param name="key" type="K"/> <param name="value" type="V"/> <param name="baseOutputPath" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Write key and value to baseOutputPath using the namedOutput. @param namedOutput the named output name @param key the key @param value the value @param baseOutputPath base-output path to write the record to. Note: Framework will generate unique filename for the baseOutputPath]]> </doc> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="KEYOUT"/> <param name="value" type="VALUEOUT"/> <param name="baseOutputPath" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Write key value to an output file name. Gets the record writer from job's output format. Job's output format should be a FileOutputFormat. @param key the key @param value the value @param baseOutputPath base-output path to write the record to. Note: Framework will generate unique filename for the baseOutputPath]]> </doc> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Closes all the opened outputs. This should be called from cleanup method of map/reduce task. If overridden subclasses must invoke <code>super.close()</code> at the end of their <code>close()</code>]]> </doc> </method> <doc> <![CDATA[The MultipleOutputs class simplifies writing output data to multiple outputs <p> Case one: writing to additional outputs other than the job default output. Each additional output, or named output, may be configured with its own <code>OutputFormat</code>, with its own key class and with its own value class. <p> Case two: to write data to different files provided by user </p> <p> MultipleOutputs supports counters, by default they are disabled. The counters group is the {@link MultipleOutputs} class name. The names of the counters are the same as the output name. These count the number records written to each output name. </p> Usage pattern for job submission: <pre> Job job = new Job(); FileInputFormat.setInputPath(job, inDir); FileOutputFormat.setOutputPath(job, outDir); job.setMapperClass(MOMap.class); job.setReducerClass(MOReduce.class); ... // Defines additional single text based output 'text' for the job MultipleOutputs.addNamedOutput(job, "text", TextOutputFormat.class, LongWritable.class, Text.class); // Defines additional sequence-file based output 'sequence' for the job MultipleOutputs.addNamedOutput(job, "seq", SequenceFileOutputFormat.class, LongWritable.class, Text.class); ... job.waitForCompletion(true); ... </pre> <p> Usage in Reducer: <pre> <K, V> String generateFileName(K k, V v) { return k.toString() + "_" + v.toString(); } public class MOReduce extends Reducer<WritableComparable, Writable,WritableComparable, Writable> { private MultipleOutputs mos; public void setup(Context context) { ... mos = new MultipleOutputs(context); } public void reduce(WritableComparable key, Iterator<Writable> values, Context context) throws IOException { ... mos.write("text", , key, new Text("Hello")); mos.write("seq", LongWritable(1), new Text("Bye"), "seq_a"); mos.write("seq", LongWritable(2), key, new Text("Chau"), "seq_b"); mos.write(key, new Text("value"), generateFileName(key, new Text("value"))); ... } public void cleanup(Context) throws IOException { mos.close(); ... } } </pre>]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.MultipleOutputs --> <!-- start class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat --> <class name="NullOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="NullOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.JobContext"/> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> </method> <doc> <![CDATA[Consume all outputs and put them in /dev/null.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat --> <class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileAsBinaryOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setSequenceFileOutputKeyClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the key class for the {@link SequenceFile} <p>This allows the user to specify the key class to be different from the actual class ({@link BytesWritable}) used for writing </p> @param job the {@link Job} to modify @param theClass the SequenceFile output key class.]]> </doc> </method> <method name="setSequenceFileOutputValueClass" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="theClass" type="java.lang.Class"/> <doc> <![CDATA[Set the value class for the {@link SequenceFile} <p>This allows the user to specify the value class to be different from the actual class ({@link BytesWritable}) used for writing </p> @param job the {@link Job} to modify @param theClass the SequenceFile output key class.]]> </doc> </method> <method name="getSequenceFileOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the key class for the {@link SequenceFile} @return the key class of the {@link SequenceFile}]]> </doc> </method> <method name="getSequenceFileOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the value class for the {@link SequenceFile} @return the value class of the {@link SequenceFile}]]> </doc> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="checkOutputSpecs" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="KEY_CLASS" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <field name="VALUE_CLASS" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[An {@link org.apache.hadoop.mapreduce.OutputFormat} that writes keys, values to {@link SequenceFile}s in binary(raw) format]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat.WritableValueBytes --> <class name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.io.SequenceFile.ValueBytes"/> <constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" type="org.apache.hadoop.io.BytesWritable" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="reset" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="value" type="org.apache.hadoop.io.BytesWritable"/> </method> <method name="writeUncompressedBytes" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="outStream" type="java.io.DataOutputStream"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="writeCompressedBytes" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="outStream" type="java.io.DataOutputStream"/> <exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getSize" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[Inner class used for appendRaw]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileAsBinaryOutputFormat.WritableValueBytes --> <!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat --> <class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="SequenceFileOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getSequenceWriter" return="org.apache.hadoop.io.SequenceFile.Writer" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <param name="keyClass" type="java.lang.Class"/> <param name="valueClass" type="java.lang.Class"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}. @param job the {@link Job} @return the {@link CompressionType} for the output {@link SequenceFile}, defaulting to {@link CompressionType#RECORD}]]> </doc> </method> <method name="setOutputCompressionType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/> <doc> <![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}. @param job the {@link Job} to modify @param style the {@link CompressionType} for the output {@link SequenceFile}]]> </doc> </method> <doc> <![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat --> <class name="TextOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TextOutputFormat" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <field name="SEPERATOR" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[An {@link OutputFormat} that writes plain text files.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat --> <!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter --> <class name="TextOutputFormat.LineRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter" abstract="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="write" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/> <exception name="IOException" type="java.io.IOException"/> </method> <field name="out" type="java.io.DataOutputStream" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> </class> <!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter --> </package> <package name="org.apache.hadoop.mapreduce.lib.partition"> <!-- start class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner --> <class name="BinaryPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="BinaryPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setOffsets" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="left" type="int"/> <param name="right" type="int"/> <doc> <![CDATA[Set the subarray to be used for partitioning to <code>bytes[left:(right+1)]</code> in Python syntax. @param conf configuration object @param left left Python-style offset @param right right Python-style offset]]> </doc> </method> <method name="setLeftOffset" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="offset" type="int"/> <doc> <![CDATA[Set the subarray to be used for partitioning to <code>bytes[offset:]</code> in Python syntax. @param conf configuration object @param offset left Python-style offset]]> </doc> </method> <method name="setRightOffset" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="offset" type="int"/> <doc> <![CDATA[Set the subarray to be used for partitioning to <code>bytes[:(offset+1)]</code> in Python syntax. @param conf configuration object @param offset right Python-style offset]]> </doc> </method> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPartition" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="org.apache.hadoop.io.BinaryComparable"/> <param name="value" type="V"/> <param name="numPartitions" type="int"/> <doc> <![CDATA[Use (the specified slice of the array returned by) {@link BinaryComparable#getBytes()} to partition.]]> </doc> </method> <field name="LEFT_OFFSET_PROPERTY_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="RIGHT_OFFSET_PROPERTY_NAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[<p>Partition {@link BinaryComparable} keys using a configurable part of the bytes array returned by {@link BinaryComparable#getBytes()}.</p> <p>The subarray to be used for the partitioning can be defined by means of the following properties: <ul> <li> <i>mapreduce.partition.binarypartitioner.left.offset</i>: left offset in array (0 by default) </li> <li> <i>mapreduce.partition.binarypartitioner.right.offset</i>: right offset in array (-1 by default) </li> </ul> Like in Python, both negative and positive offsets are allowed, but the meaning is slightly different. In case of an array of length 5, for instance, the possible offsets are: <pre><code> +---+---+---+---+---+ | B | B | B | B | B | +---+---+---+---+---+ 0 1 2 3 4 -5 -4 -3 -2 -1 </code></pre> The first row of numbers gives the position of the offsets 0...5 in the array; the second row gives the corresponding negative offsets. Contrary to Python, the specified subarray has byte <code>i</code> and <code>j</code> as first and last element, repectively, when <code>i</code> and <code>j</code> are the left and right offset. <p>For Hadoop programs written in Java, it is advisable to use one of the following static convenience methods for setting the offsets: <ul> <li>{@link #setOffsets}</li> <li>{@link #setLeftOffset}</li> <li>{@link #setRightOffset}</li> </ul></p>]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.BinaryPartitioner --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner --> <class name="HashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="HashPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getPartition" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <param name="numReduceTasks" type="int"/> <doc> <![CDATA[Use {@link Object#hashCode()} to partition.]]> </doc> </method> <doc> <![CDATA[Partition keys by their {@link Object#hashCode()}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler --> <class name="InputSampler" extends="org.apache.hadoop.conf.Configured" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="InputSampler" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="writePartitionFile" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="sampler" type="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/> <exception name="IOException" type="java.io.IOException"/> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Write a partition file for the given job, using the Sampler provided. Queries the sampler for a sample keyset, sorts by the output key comparator, selects the keys for each rank, and writes to the destination returned from {@link TotalOrderPartitioner#getPartitionFile}.]]> </doc> </method> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> <doc> <![CDATA[Driver for InputSampler from the command line. Configures a JobConf instance and calls {@link #writePartitionFile}.]]> </doc> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <doc> <![CDATA[Utility for collecting samples and writing a partition file for {@link TotalOrderPartitioner}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler.IntervalSampler --> <class name="InputSampler.IntervalSampler" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/> <constructor name="InputSampler.IntervalSampler" type="double" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a new IntervalSampler sampling <em>all</em> splits. @param freq The frequency with which records will be emitted.]]> </doc> </constructor> <constructor name="InputSampler.IntervalSampler" type="double, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a new IntervalSampler. @param freq The frequency with which records will be emitted. @param maxSplitsSampled The maximum number of splits to examine. @see #getSample]]> </doc> </constructor> <method name="getSample" return="K[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[For each split sampled, emit when the ratio of the number of records retained to the total record count is less than the specified frequency.]]> </doc> </method> <doc> <![CDATA[Sample from s splits at regular intervals. Useful for sorted data.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler.IntervalSampler --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler.RandomSampler --> <class name="InputSampler.RandomSampler" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/> <constructor name="InputSampler.RandomSampler" type="double, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a new RandomSampler sampling <em>all</em> splits. This will read every split at the client, which is very expensive. @param freq Probability with which a key will be chosen. @param numSamples Total number of samples to obtain from all selected splits.]]> </doc> </constructor> <constructor name="InputSampler.RandomSampler" type="double, int, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a new RandomSampler. @param freq Probability with which a key will be chosen. @param numSamples Total number of samples to obtain from all selected splits. @param maxSplitsSampled The maximum number of splits to examine.]]> </doc> </constructor> <method name="getSample" return="K[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Randomize the split order, then take the specified number of keys from each split sampled, where each key is selected with the specified probability and possibly replaced by a subsequently selected key when the quota of keys from that split is satisfied.]]> </doc> </method> <doc> <![CDATA[Sample from random points in the input. General-purpose sampler. Takes numSamples / maxSplitsSampled inputs from each split.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler.RandomSampler --> <!-- start interface org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler --> <interface name="InputSampler.Sampler" abstract="true" static="true" final="false" visibility="public" deprecated="not deprecated"> <method name="getSample" return="K[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[For a given job, collect and return a subset of the keys from the input data.]]> </doc> </method> <doc> <![CDATA[Interface to sample using an {@link org.apache.hadoop.mapreduce.InputFormat}.]]> </doc> </interface> <!-- end interface org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.InputSampler.SplitSampler --> <class name="InputSampler.SplitSampler" extends="java.lang.Object" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapreduce.lib.partition.InputSampler.Sampler"/> <constructor name="InputSampler.SplitSampler" type="int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a SplitSampler sampling <em>all</em> splits. Takes the first numSamples / numSplits records from each split. @param numSamples Total number of samples to obtain from all selected splits.]]> </doc> </constructor> <constructor name="InputSampler.SplitSampler" type="int, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Create a new SplitSampler. @param numSamples Total number of samples to obtain from all selected splits. @param maxSplitsSampled The maximum number of splits to examine.]]> </doc> </constructor> <method name="getSample" return="K[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="inf" type="org.apache.hadoop.mapreduce.InputFormat"/> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[From each split sampled, take the first numSamples / numSplits records.]]> </doc> </method> <doc> <![CDATA[Samples the first n records from s splits. Inexpensive way to sample random data.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.InputSampler.SplitSampler --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator --> <class name="KeyFieldBasedComparator" extends="org.apache.hadoop.io.WritableComparator" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="KeyFieldBasedComparator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="compare" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="b1" type="byte[]"/> <param name="s1" type="int"/> <param name="l1" type="int"/> <param name="b2" type="byte[]"/> <param name="s2" type="int"/> <param name="l2" type="int"/> </method> <method name="setKeyFieldComparatorOptions" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="keySpec" type="java.lang.String"/> <doc> <![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys. @param keySpec the key specification of the form -k pos1[,pos2], where, pos is of the form f[.c][opts], where f is the number of the key field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field). opts are ordering options. The supported options are: -n, (Sort numerically) -r, (Reverse the result of comparison)]]> </doc> </method> <method name="getKeyFieldComparatorOption" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the {@link KeyFieldBasedComparator} options]]> </doc> </method> <field name="COMPARATOR_OPTIONS" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This comparator implementation provides a subset of the features provided by the Unix/GNU Sort. In particular, the supported features are: -n, (Sort numerically) -r, (Reverse the result of comparison) -k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number of the field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field). opts are ordering options (any of 'nr' as described above). We assume that the fields in the key are separated by {@link JobContext#MAP_OUTPUT_KEY_FIELD_SEPERATOR}.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedComparator --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner --> <class name="KeyFieldBasedPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="KeyFieldBasedPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPartition" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K2"/> <param name="value" type="V2"/> <param name="numReduceTasks" type="int"/> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="b" type="byte[]"/> <param name="start" type="int"/> <param name="end" type="int"/> <param name="currentHash" type="int"/> </method> <method name="getPartition" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="hash" type="int"/> <param name="numReduceTasks" type="int"/> </method> <method name="setKeyFieldPartitionerOptions" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="keySpec" type="java.lang.String"/> <doc> <![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for {@link Partitioner} @param keySpec the key specification of the form -k pos1[,pos2], where, pos is of the form f[.c][opts], where f is the number of the key field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field).]]> </doc> </method> <method name="getKeyFieldPartitionerOption" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.JobContext"/> <doc> <![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]> </doc> </method> <field name="PARTITIONER_OPTIONS" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[Defines a way to partition keys based on certain key fields (also see {@link KeyFieldBasedComparator}. The key specification supported is of the form -k pos1[,pos2], where, pos is of the form f[.c][opts], where f is the number of the key field to use, and c is the number of the first character from the beginning of the field. Fields and character posns are numbered starting with 1; a character position of zero in pos2 indicates the field's last character. If '.c' is omitted from pos1, it defaults to 1 (the beginning of the field); if omitted from pos2, it defaults to 0 (the end of the field).]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.KeyFieldBasedPartitioner --> <!-- start class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner --> <class name="TotalOrderPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="TotalOrderPartitioner" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Read in the partition file and build indexing data structures. If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and <tt>total.order.partitioner.natural.order</tt> is not false, a trie of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes will be built. Otherwise, keys will be located using a binary search of the partition keyset using the {@link org.apache.hadoop.io.RawComparator} defined for this job. The input file must be sorted with the same comparator and contain {@link Job#getNumReduceTasks()} - 1 keys.]]> </doc> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPartition" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="V"/> <param name="numPartitions" type="int"/> </method> <method name="setPartitionFile" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="p" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[Set the path to the SequenceFile storing the sorted partition keyset. It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt> keys in the SequenceFile.]]> </doc> </method> <method name="getPartitionFile" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <doc> <![CDATA[Get the path to the SequenceFile storing the sorted partition keyset. @see #setPartitionFile(Configuration, Path)]]> </doc> </method> <field name="DEFAULT_PATH" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="PARTITIONER_PATH" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="MAX_TRIE_DEPTH" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <field name="NATURAL_ORDER" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[Partitioner effecting a total order by reading split points from an externally generated source.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner --> </package> <package name="org.apache.hadoop.mapreduce.lib.reduce"> <!-- start class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer --> <class name="IntSumReducer" extends="org.apache.hadoop.mapreduce.Reducer" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="IntSumReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="Key"/> <param name="values" type="java.lang.Iterable"/> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer --> <!-- start class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer --> <class name="LongSumReducer" extends="org.apache.hadoop.mapreduce.Reducer" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="LongSumReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="reduce" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="KEY"/> <param name="values" type="java.lang.Iterable"/> <param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> </class> <!-- end class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer --> <!-- start class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer --> <class name="WrappedReducer" extends="org.apache.hadoop.mapreduce.Reducer" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="WrappedReducer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getReducerContext" return="org.apache.hadoop.mapreduce.Reducer.Context" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="reduceContext" type="org.apache.hadoop.mapreduce.ReduceContext"/> <doc> <![CDATA[A a wrapped {@link Reducer.Context} for custom implementations. @param reduceContext <code>ReduceContext</code> to be wrapped @return a wrapped <code>Reducer.Context</code> for custom implementations]]> </doc> </method> <doc> <![CDATA[A {@link Reducer} which wraps a given one to allow for custom {@link Reducer.Context} implementations.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer --> <!-- start class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer.Context --> <class name="WrappedReducer.Context" extends="org.apache.hadoop.mapreduce.Reducer.Context" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="WrappedReducer.Context" type="org.apache.hadoop.mapreduce.ReduceContext" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getCurrentKey" return="KEYIN" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCurrentValue" return="VALUEIN" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="nextKeyValue" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counterName" type="java.lang.Enum"/> </method> <method name="getCounter" return="org.apache.hadoop.mapreduce.Counter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="groupName" type="java.lang.String"/> <param name="counterName" type="java.lang.String"/> </method> <method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="KEYOUT"/> <param name="value" type="VALUEOUT"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getStatus" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="msg" type="java.lang.String"/> </method> <method name="getArchiveClassPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getArchiveTimestamps" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getCacheArchives" return="java.net.URI[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getCacheFiles" return="java.net.URI[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getCombinerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getConfiguration" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFileClassPaths" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFileTimestamps" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getInputFormatClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getJar" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobSetupCleanupNeeded" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getLocalCacheArchives" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getLocalCacheFiles" return="org.apache.hadoop.fs.Path[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getMapOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapperClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getMaxMapAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMaxReduceAttempts" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNumReduceTasks" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutputFormatClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getOutputKeyClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutputValueClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPartitionerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getReducerClass" return="java.lang.Class" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/> </method> <method name="getSortComparator" return="org.apache.hadoop.io.RawComparator" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSymlink" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="progress" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getValues" return="java.lang.Iterable" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="nextKey" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="getProfileEnabled" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProfileParams" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="isMap" type="boolean"/> </method> <method name="getUser" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getCredentials" return="org.apache.hadoop.security.Credentials" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <field name="reduceContext" type="org.apache.hadoop.mapreduce.ReduceContext" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> </class> <!-- end class org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer.Context --> </package> <package name="org.apache.hadoop.mapreduce.security"> <!-- start class org.apache.hadoop.mapreduce.security.TokenCache --> <class name="TokenCache" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TokenCache" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getSecretKey" return="byte[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="credentials" type="org.apache.hadoop.security.Credentials"/> <param name="alias" type="org.apache.hadoop.io.Text"/> <doc> <![CDATA[auxiliary method to get user's secret keys.. @param alias @return secret key from the storage]]> </doc> </method> <method name="obtainTokensForNamenodes" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="credentials" type="org.apache.hadoop.security.Credentials"/> <param name="ps" type="org.apache.hadoop.fs.Path[]"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Convenience method to obtain delegation tokens from namenodes corresponding to the paths passed. @param credentials @param ps array of paths @param conf configuration @throws IOException]]> </doc> </method> <method name="getDelegationToken" return="org.apache.hadoop.security.token.Token" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="credentials" type="org.apache.hadoop.security.Credentials"/> <param name="namenode" type="java.lang.String"/> <doc> <![CDATA[@param namenode @return delegation token]]> </doc> </method> <method name="loadTokens" return="org.apache.hadoop.security.Credentials" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="jobTokenFile" type="java.lang.String"/> <param name="conf" type="org.apache.hadoop.mapred.JobConf"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[load job token from a file @param conf @throws IOException]]> </doc> </method> <method name="setJobToken" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="t" type="org.apache.hadoop.security.token.Token"/> <param name="credentials" type="org.apache.hadoop.security.Credentials"/> <doc> <![CDATA[store job token @param t]]> </doc> </method> <method name="getJobToken" return="org.apache.hadoop.security.token.Token" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="credentials" type="org.apache.hadoop.security.Credentials"/> <doc> <![CDATA[@return job token]]> </doc> </method> <field name="JOB_TOKEN_HDFS_FILE" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[file name used on HDFS for generated job token]]> </doc> </field> <field name="JOB_TOKENS_FILENAME" type="java.lang.String" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[conf setting for job tokens cache file name]]> </doc> </field> <doc> <![CDATA[This class provides user facing APIs for transferring secrets from the job client to the tasks. The secrets can be stored just before submission of jobs and read during the task execution.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.security.TokenCache --> </package> <package name="org.apache.hadoop.mapreduce.server.jobtracker"> <!-- start class org.apache.hadoop.mapreduce.server.jobtracker.State --> <class name="State" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="Use {@link JobTrackerStatus} instead."> <method name="values" return="org.apache.hadoop.mapreduce.server.jobtracker.State[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.mapreduce.server.jobtracker.State" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <doc> <![CDATA[Describes the state of JobTracker @deprecated Use {@link JobTrackerStatus} instead.]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.server.jobtracker.State --> </package> <package name="org.apache.hadoop.mapreduce.tools"> <!-- start class org.apache.hadoop.mapreduce.tools.CLI --> <class name="CLI" extends="org.apache.hadoop.conf.Configured" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="CLI" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="CLI" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="argv" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <method name="getCounter" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="counters" type="org.apache.hadoop.mapreduce.Counters"/> <param name="counterGroupName" type="java.lang.String"/> <param name="counterName" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getTaskLogURL" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="protected" deprecated="not deprecated"> <param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/> <param name="baseUrl" type="java.lang.String"/> </method> <method name="displayTasks" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapreduce.Job"/> <param name="type" type="java.lang.String"/> <param name="state" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> <doc> <![CDATA[Display the information about a job's tasks, of a particular type and in a particular state @param job the job @param type the type of the task (map/reduce/setup/cleanup) @param state the state of the task (pending/running/completed/failed/killed)]]> </doc> </method> <method name="displayJobList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="jobs" type="org.apache.hadoop.mapreduce.Job[]"/> <exception name="IOException" type="java.io.IOException"/> <exception name="InterruptedException" type="java.lang.InterruptedException"/> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="argv" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <doc> <![CDATA[Interprets the map reduce cli options]]> </doc> </class> <!-- end class org.apache.hadoop.mapreduce.tools.CLI --> </package> <package name="org.apache.hadoop.fs"> <!-- start class org.apache.hadoop.fs.HarFileSystem --> <class name="HarFileSystem" extends="org.apache.hadoop.fs.FilterFileSystem" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="HarFileSystem" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[public construction of harfilesystem]]> </doc> </constructor> <constructor name="HarFileSystem" type="org.apache.hadoop.fs.FileSystem" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor to create a HarFileSystem with an underlying filesystem. @param fs]]> </doc> </constructor> <method name="initialize" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.net.URI"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Initialize a Har filesystem per har archive. The archive home directory is the top level directory in the filesystem that contains the HAR archive. Be careful with this method, you do not want to go on creating new Filesystem instances per call to path.getFileSystem(). the uri of Har is har://underlyingfsscheme-host:port/archivepath. or har:///archivepath. This assumes the underlying filesystem to be used in case not specified.]]> </doc> </method> <method name="getHarVersion" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[return the top level archive.]]> </doc> </method> <method name="getUri" return="java.net.URI" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Returns the uri of this filesystem. The uri is of the form har://underlyingfsschema-host:port/pathintheunderlyingfs]]> </doc> </method> <method name="makeQualified" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> </method> <method name="getFileBlockLocations" return="org.apache.hadoop.fs.BlockLocation[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="file" type="org.apache.hadoop.fs.FileStatus"/> <param name="start" type="long"/> <param name="len" type="long"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[get block locations from the underlying fs @param file the input filestatus to get block locations @param start the start in the file @param len the length in the file @return block locations for this segment of file @throws IOException]]> </doc> </method> <method name="getHarHash" return="int" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="p" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[the hash of the path p inside iniside the filesystem @param p the path in the harfilesystem @return the hash code of the path.]]> </doc> </method> <method name="getFileStatus" return="org.apache.hadoop.fs.FileStatus" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[return the filestatus of files in har archive. The permission returned are that of the archive index files. The permissions are not persisted while creating a hadoop archive. @param f the path in har filesystem @return filestatus. @throws IOException]]> </doc> </method> <method name="getFileChecksum" return="org.apache.hadoop.fs.FileChecksum" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <doc> <![CDATA[@return null since no checksum algorithm is implemented.]]> </doc> </method> <method name="open" return="org.apache.hadoop.fs.FSDataInputStream" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <param name="bufferSize" type="int"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Returns a har input stream which fakes end of file. It reads the index files to get the part file name and the size and start of the file.]]> </doc> </method> <method name="create" return="org.apache.hadoop.fs.FSDataOutputStream" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <param name="bufferSize" type="int"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="create" return="org.apache.hadoop.fs.FSDataOutputStream" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <param name="permission" type="org.apache.hadoop.fs.permission.FsPermission"/> <param name="flag" type="java.util.EnumSet"/> <param name="bufferSize" type="int"/> <param name="replication" type="short"/> <param name="blockSize" type="long"/> <param name="progress" type="org.apache.hadoop.util.Progressable"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="setReplication" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="src" type="org.apache.hadoop.fs.Path"/> <param name="replication" type="short"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Not implemented.]]> </doc> </method> <method name="delete" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <param name="recursive" type="boolean"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Not implemented.]]> </doc> </method> <method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[liststatus returns the children of a directory after looking up the index files.]]> </doc> </method> <method name="getHomeDirectory" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[return the top level archive path.]]> </doc> </method> <method name="setWorkingDirectory" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="newDir" type="org.apache.hadoop.fs.Path"/> </method> <method name="mkdirs" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="f" type="org.apache.hadoop.fs.Path"/> <param name="permission" type="org.apache.hadoop.fs.permission.FsPermission"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[not implemented.]]> </doc> </method> <method name="copyFromLocalFile" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="delSrc" type="boolean"/> <param name="src" type="org.apache.hadoop.fs.Path"/> <param name="dst" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[not implemented.]]> </doc> </method> <method name="copyToLocalFile" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="delSrc" type="boolean"/> <param name="src" type="org.apache.hadoop.fs.Path"/> <param name="dst" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[copies the file in the har filesystem to a local file.]]> </doc> </method> <method name="startLocalOutput" return="org.apache.hadoop.fs.Path" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="fsOutputFile" type="org.apache.hadoop.fs.Path"/> <param name="tmpLocalFile" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[not implemented.]]> </doc> </method> <method name="completeLocalOutput" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="fsOutputFile" type="org.apache.hadoop.fs.Path"/> <param name="tmpLocalFile" type="org.apache.hadoop.fs.Path"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[not implemented.]]> </doc> </method> <method name="setOwner" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="p" type="org.apache.hadoop.fs.Path"/> <param name="username" type="java.lang.String"/> <param name="groupname" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[not implemented.]]> </doc> </method> <method name="setPermission" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="p" type="org.apache.hadoop.fs.Path"/> <param name="permisssion" type="org.apache.hadoop.fs.permission.FsPermission"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Not implemented.]]> </doc> </method> <field name="VERSION" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[This is an implementation of the Hadoop Archive Filesystem. This archive Filesystem has index files of the form _index* and has contents of the form part-*. The index files store the indexes of the real files. The index files are of the form _masterindex and _index. The master index is a level of indirection in to the index file to make the look ups faster. the index file is sorted with hash code of the paths that it contains and the master index contains pointers to the positions in index for ranges of hashcodes.]]> </doc> </class> <!-- end class org.apache.hadoop.fs.HarFileSystem --> </package> <package name="org.apache.hadoop.tools"> <!-- start class org.apache.hadoop.tools.DistCh --> <class name="DistCh" extends="org.apache.hadoop.tools.DistTool" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> <doc> <![CDATA[This is the main driver for recursively changing files properties.]]> </doc> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <doc> <![CDATA[A Map-reduce program to recursively change files properties such as owner, group and permission.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.DistCh --> <!-- start class org.apache.hadoop.tools.DistCp --> <class name="DistCp" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="DistCp" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="copy" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <param name="srcPath" type="java.lang.String"/> <param name="destPath" type="java.lang.String"/> <param name="logPath" type="org.apache.hadoop.fs.Path"/> <param name="srcAsList" type="boolean"/> <param name="ignoreReadFailures" type="boolean"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <doc> <![CDATA[This is the main driver for recursively copying directories across file systems. It takes at least two cmdline parameters. A source URL and a destination URL. It then essentially does an "ls -lR" on the source URL, and writes the output in a round-robin manner to all the map input files. The mapper actually copies the files allotted to it. The reduce is empty.]]> </doc> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <method name="getRandomId" return="java.lang.String" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <field name="LOG" type="org.apache.commons.logging.Log" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[A Map-reduce program to recursively copy directories between different file-systems.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.DistCp --> <!-- start class org.apache.hadoop.tools.DistCp.DuplicationException --> <class name="DistCp.DuplicationException" extends="java.io.IOException" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <field name="ERROR_CODE" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Error code for this exception]]> </doc> </field> <doc> <![CDATA[An exception class for duplicated source files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.DistCp.DuplicationException --> <!-- start class org.apache.hadoop.tools.HadoopArchives --> <class name="HadoopArchives" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="HadoopArchives" type="org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> <doc> <![CDATA[the main driver for creating the archives it takes at least three command line parameters. The parent path, The src and the dest. It does an lsr on the source paths. The mapper created archuves and the reducer creates the archive index.]]> </doc> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <doc> <![CDATA[the main functions]]> </doc> </method> <field name="VERSION" type="int" transient="false" volatile="false" static="true" final="true" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[a archive creation utility. This class provides methods that can be used to create hadoop archives. For understanding of Hadoop archives look at {@link HarFileSystem}.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.HadoopArchives --> <!-- start class org.apache.hadoop.tools.Logalyzer --> <class name="Logalyzer" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Logalyzer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="doArchive" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="logListURI" type="java.lang.String"/> <param name="archiveDirectory" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[doArchive: Workhorse function to archive log-files. @param logListURI : The uri which will serve list of log-files to archive. @param archiveDirectory : The directory to store archived logfiles. @throws IOException]]> </doc> </method> <method name="doAnalyze" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="inputFilesDirectory" type="java.lang.String"/> <param name="outputDirectory" type="java.lang.String"/> <param name="grepPattern" type="java.lang.String"/> <param name="sortColumns" type="java.lang.String"/> <param name="columnSeparator" type="java.lang.String"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[doAnalyze: @param inputFilesDirectory : Directory containing the files to be analyzed. @param outputDirectory : Directory to store analysis (output). @param grepPattern : Pattern to *grep* for. @param sortColumns : Sort specification for output. @param columnSeparator : Column separator. @throws IOException]]> </doc> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> </method> <field name="SORT_COLUMNS" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <field name="COLUMN_SEPARATOR" type="java.lang.String" transient="false" volatile="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </field> <doc> <![CDATA[Logalyzer: A utility tool for archiving and analyzing hadoop logs. <p> This tool supports archiving and anaylzing (sort/grep) of log-files. It takes as input a) Input uri which will serve uris of the logs to be archived. b) Output directory (not mandatory). b) Directory on dfs to archive the logs. c) The sort/grep patterns for analyzing the files and separator for boundaries. Usage: Logalyzer -archive -archiveDir <directory to archive logs> -analysis <directory> -logs <log-list uri> -grep <pattern> -sort <col1, col2> -separator <separator> <p>]]> </doc> </class> <!-- end class org.apache.hadoop.tools.Logalyzer --> <!-- start class org.apache.hadoop.tools.Logalyzer.LogComparator --> <class name="Logalyzer.LogComparator" extends="org.apache.hadoop.io.Text.Comparator" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.conf.Configurable"/> <constructor name="Logalyzer.LogComparator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> </method> <method name="getConf" return="org.apache.hadoop.conf.Configuration" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="compare" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="b1" type="byte[]"/> <param name="s1" type="int"/> <param name="l1" type="int"/> <param name="b2" type="byte[]"/> <param name="s2" type="int"/> <param name="l2" type="int"/> </method> <doc> <![CDATA[A WritableComparator optimized for UTF8 keys of the logs.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.Logalyzer.LogComparator --> <!-- start class org.apache.hadoop.tools.Logalyzer.LogRegexMapper --> <class name="Logalyzer.LogRegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.mapred.Mapper"/> <constructor name="Logalyzer.LogRegexMapper" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="configure" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="job" type="org.apache.hadoop.mapred.JobConf"/> </method> <method name="map" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="key" type="K"/> <param name="value" type="org.apache.hadoop.io.Text"/> <param name="output" type="org.apache.hadoop.mapred.OutputCollector"/> <param name="reporter" type="org.apache.hadoop.mapred.Reporter"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.Logalyzer.LogRegexMapper --> </package> <package name="org.apache.hadoop.tools.rumen"> <!-- start class org.apache.hadoop.tools.rumen.AbstractClusterStory --> <class name="AbstractClusterStory" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.ClusterStory"/> <constructor name="AbstractClusterStory" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getMachines" return="java.util.Set" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getRacks" return="java.util.Set" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getRandomMachines" return="org.apache.hadoop.tools.rumen.MachineNode[]" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="expected" type="int"/> <param name="random" type="java.util.Random"/> </method> <method name="buildMachineNodeMap" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getMachineByName" return="org.apache.hadoop.tools.rumen.MachineNode" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <method name="distance" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="a" type="org.apache.hadoop.tools.rumen.Node"/> <param name="b" type="org.apache.hadoop.tools.rumen.Node"/> </method> <method name="buildRackNodeMap" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <method name="getRackByName" return="org.apache.hadoop.tools.rumen.RackNode" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <method name="getMaximumDistance" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="parseTopologyTree" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="protected" deprecated="not deprecated"> </method> <field name="machineNodes" type="java.util.Set" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="rackNodes" type="java.util.Set" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="mNodesFlattened" type="org.apache.hadoop.tools.rumen.MachineNode[]" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="mNodeMap" type="java.util.Map" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="rNodeMap" type="java.util.Map" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <field name="maximumDistance" type="int" transient="false" volatile="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[{@link AbstractClusterStory} provides a partial implementation of {@link ClusterStory} by parsing the topology tree.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.AbstractClusterStory --> <!-- start class org.apache.hadoop.tools.rumen.CDFPiecewiseLinearRandomGenerator --> <class name="CDFPiecewiseLinearRandomGenerator" extends="org.apache.hadoop.tools.rumen.CDFRandomGenerator" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="CDFPiecewiseLinearRandomGenerator" type="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param cdf builds a CDFRandomValue engine around this {@link LoggedDiscreteCDF}, with a defaultly seeded RNG]]> </doc> </constructor> <constructor name="CDFPiecewiseLinearRandomGenerator" type="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF, long" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param cdf builds a CDFRandomValue engine around this {@link LoggedDiscreteCDF}, with an explicitly seeded RNG @param seed the random number generator seed]]> </doc> </constructor> <method name="valueAt" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="probability" type="double"/> <doc> <![CDATA[TODO This code assumes that the empirical minimum resp. maximum is the epistomological minimum resp. maximum. This is probably okay for the minimum, because that likely represents a task where everything went well, but for the maximum we may want to develop a way of extrapolating past the maximum.]]> </doc> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.CDFPiecewiseLinearRandomGenerator --> <!-- start class org.apache.hadoop.tools.rumen.CDFRandomGenerator --> <class name="CDFRandomGenerator" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="initializeTables" abstract="false" native="false" synchronized="false" static="false" final="true" visibility="protected" deprecated="not deprecated"> <param name="cdf" type="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF"/> </method> <method name="floorIndex" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="probe" type="double"/> </method> <method name="getRankingAt" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="index" type="int"/> </method> <method name="getDatumAt" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="protected" deprecated="not deprecated"> <param name="index" type="int"/> </method> <method name="randomValue" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueAt" return="long" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="probability" type="double"/> </method> <doc> <![CDATA[An instance of this class generates random values that confirm to the embedded {@link LoggedDiscreteCDF} . The discrete CDF is a pointwise approximation of the "real" CDF. We therefore have a choice of interpolation rules. A concrete subclass of this abstract class will implement valueAt(double) using a class-dependent interpolation rule.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.CDFRandomGenerator --> <!-- start interface org.apache.hadoop.tools.rumen.ClusterStory --> <interface name="ClusterStory" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="getMachines" return="java.util.Set" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get all machines of the cluster. @return A read-only set that contains all machines of the cluster.]]> </doc> </method> <method name="getRacks" return="java.util.Set" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get all racks of the cluster. @return A read-only set that contains all racks of the cluster.]]> </doc> </method> <method name="getClusterTopology" return="org.apache.hadoop.tools.rumen.Node" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the cluster topology tree. @return The root node of the cluster topology tree.]]> </doc> </method> <method name="getRandomMachines" return="org.apache.hadoop.tools.rumen.MachineNode[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="expected" type="int"/> <param name="random" type="java.util.Random"/> <doc> <![CDATA[Select a random set of machines. @param expected The expected sample size. @param random Random number generator to use. @return An array of up to expected number of {@link MachineNode}s.]]> </doc> </method> <method name="getMachineByName" return="org.apache.hadoop.tools.rumen.MachineNode" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Get {@link MachineNode} by its host name. @return The {@link MachineNode} with the same name. Or null if not found.]]> </doc> </method> <method name="getRackByName" return="org.apache.hadoop.tools.rumen.RackNode" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> <doc> <![CDATA[Get {@link RackNode} by its name. @return The {@link RackNode} with the same name. Or null if not found.]]> </doc> </method> <method name="distance" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="a" type="org.apache.hadoop.tools.rumen.Node"/> <param name="b" type="org.apache.hadoop.tools.rumen.Node"/> <doc> <![CDATA[Determine the distance between two {@link Node}s. Currently, the distance is loosely defined as the length of the longer path for either a or b to reach their common ancestor. @param a @param b @return The distance between {@link Node} a and {@link Node} b.]]> </doc> </method> <method name="getMaximumDistance" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the maximum distance possible between any two nodes. @return the maximum distance possible between any two nodes.]]> </doc> </method> <doc> <![CDATA[{@link ClusterStory} represents all configurations of a MapReduce cluster, including nodes, network topology, and slot configurations.]]> </doc> </interface> <!-- end interface org.apache.hadoop.tools.rumen.ClusterStory --> <!-- start class org.apache.hadoop.tools.rumen.ClusterTopologyReader --> <class name="ClusterTopologyReader" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ClusterTopologyReader" type="org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor. @param path Path to the JSON-encoded topology file, possibly compressed. @param conf @throws IOException]]> </doc> </constructor> <constructor name="ClusterTopologyReader" type="java.io.InputStream" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor. @param input The input stream for the JSON-encoded topology data.]]> </doc> </constructor> <method name="get" return="org.apache.hadoop.tools.rumen.LoggedNetworkTopology" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link LoggedNetworkTopology} object. @return The {@link LoggedNetworkTopology} object parsed from the input.]]> </doc> </method> <doc> <![CDATA[Reading JSON-encoded cluster topology and produce the parsed {@link LoggedNetworkTopology} object.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.ClusterTopologyReader --> <!-- start class org.apache.hadoop.tools.rumen.CurrentJHParser --> <class name="CurrentJHParser" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.JobHistoryParser"/> <constructor name="CurrentJHParser" type="java.io.InputStream" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="canParse" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="input" type="java.io.InputStream"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Can this parser parse the input? @param input @return Whether this parser can parse the input. @throws IOException]]> </doc> </method> <method name="nextEvent" return="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[{@link JobHistoryParser} that parses {@link JobHistory} files produced by {@link org.apache.hadoop.mapreduce.jobhistory.JobHistory} in the same source code tree as rumen.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.CurrentJHParser --> <!-- start interface org.apache.hadoop.tools.rumen.DeepCompare --> <interface name="DeepCompare" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="other" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="myLocation" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> <doc> <![CDATA[@param other the other comparand that's being compared to me @param myLocation the path that got to me. In the root, myLocation is null. To process the scalar {@code foo} field of the root we will make a recursive call with a {@link TreePath} whose {@code fieldName} is {@code "bar"} and whose {@code index} is -1 and whose {@code parent} is {@code null}. To process the plural {@code bar} field of the root we will make a recursive call with a {@link TreePath} whose fieldName is {@code "foo"} and whose {@code index} is -1 and whose {@code parent} is also {@code null}. @throws DeepInequalityException]]> </doc> </method> <doc> <![CDATA[Classes that implement this interface can deep-compare [for equality only, not order] with another instance. They do a deep compare. If there is any semantically significant difference, an implementer throws an Exception to be thrown with a chain of causes describing the chain of field references and indices that get you to the miscompared point.]]> </doc> </interface> <!-- end interface org.apache.hadoop.tools.rumen.DeepCompare --> <!-- start class org.apache.hadoop.tools.rumen.DeepInequalityException --> <class name="DeepInequalityException" extends="java.lang.Exception" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="DeepInequalityException" type="java.lang.String, org.apache.hadoop.tools.rumen.TreePath, java.lang.Throwable" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param message an exception message @param path the path that gets from the root to the inequality This is the constructor that I intend to have used for this exception.]]> </doc> </constructor> <constructor name="DeepInequalityException" type="java.lang.String, org.apache.hadoop.tools.rumen.TreePath" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param message an exception message @param path the path that gets from the root to the inequality This is the constructor that I intend to have used for this exception.]]> </doc> </constructor> <doc> <![CDATA[We use this exception class in the unit test, and we do a deep comparison when we run the]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.DeepInequalityException --> <!-- start class org.apache.hadoop.tools.rumen.DefaultInputDemuxer --> <class name="DefaultInputDemuxer" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.InputDemuxer"/> <constructor name="DefaultInputDemuxer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="bindTo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="getNext" return="org.apache.hadoop.tools.rumen.Pair" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[{@link DefaultInputDemuxer} acts as a pass-through demuxer. It just opens each file and returns back the input stream. If the input is compressed, it would return a decompression stream.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.DefaultInputDemuxer --> <!-- start class org.apache.hadoop.tools.rumen.DefaultOutputter --> <class name="DefaultOutputter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.Outputter"/> <constructor name="DefaultOutputter" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="init" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="output" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="object" type="T"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[The default {@link Outputter} that outputs to a plain file. Compression will be applied if the path has the right suffix.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.DefaultOutputter --> <!-- start class org.apache.hadoop.tools.rumen.DeskewedJobTraceReader --> <class name="DeskewedJobTraceReader" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.io.Closeable"/> <constructor name="DeskewedJobTraceReader" type="org.apache.hadoop.tools.rumen.JobTraceReader, int, boolean" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor. @param reader the {@link JobTraceReader} that's being protected @param skewBufferLength [the number of late jobs that can preced a later out-of-order earlier job @throws IOException]]> </doc> </constructor> <constructor name="DeskewedJobTraceReader" type="org.apache.hadoop.tools.rumen.JobTraceReader" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.DeskewedJobTraceReader --> <!-- start class org.apache.hadoop.tools.rumen.Folder --> <class name="Folder" extends="org.apache.hadoop.conf.Configured" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="Folder" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <doc> <![CDATA[@param args]]> </doc> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.Folder --> <!-- start class org.apache.hadoop.tools.rumen.Hadoop20JHParser --> <class name="Hadoop20JHParser" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.JobHistoryParser"/> <constructor name="Hadoop20JHParser" type="java.io.InputStream" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="canParse" return="boolean" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="input" type="java.io.InputStream"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Can this parser parse the input? @param input @return Whether this parser can parse the input. @throws IOException We will deem a stream to be a good 0.20 job history stream if the first line is exactly "Meta VERSION=\"1\" ."]]> </doc> </method> <method name="nextEvent" return="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[{@link JobHistoryParser} to parse job histories for hadoop 0.20 (META=1).]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.Hadoop20JHParser --> <!-- start class org.apache.hadoop.tools.rumen.HadoopLogsAnalyzer --> <class name="HadoopLogsAnalyzer" extends="org.apache.hadoop.conf.Configured" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="HadoopLogsAnalyzer" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <doc> <![CDATA[@param args Last arg is the input file. That file can be a directory, in which case you get all the files in sorted order. We will decompress files whose nmes end in .gz . switches: -c collect line types. -d debug mode -delays print out the delays [interval between job submit time and launch time] -runtimes print out the job runtimes -spreads print out the ratio of 10%ile and 90%ile, of both the successful map task attempt run times and the the successful reduce task attempt run times -tasktimes prints out individual task time distributions collects all the line types and prints the first example of each one]]> </doc> </method> <doc> <![CDATA[This is the main class for rumen log mining functionality. It reads a directory of job tracker logs, and computes various information about it. See {@code usage()}, below.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.HadoopLogsAnalyzer --> <!-- start interface org.apache.hadoop.tools.rumen.InputDemuxer --> <interface name="InputDemuxer" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.io.Closeable"/> <method name="bindTo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Bind the {@link InputDemuxer} to a particular file. @param path The path to the file it should bind to. @param conf Configuration @throws IOException Returns true when the binding succeeds. If the file can be read but is in the wrong format, returns false. IOException is reserved for read errors.]]> </doc> </method> <method name="getNext" return="org.apache.hadoop.tools.rumen.Pair" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the next <name, input> pair. The name should preserve the original job history file or job conf file name. The input object should be closed before calling getNext() again. The old input object would be invalid after calling getNext() again. @return the next <name, input> pair.]]> </doc> </method> <doc> <![CDATA[{@link InputDemuxer} dem-ultiplexes the input files into individual input streams.]]> </doc> </interface> <!-- end interface org.apache.hadoop.tools.rumen.InputDemuxer --> <!-- start class org.apache.hadoop.tools.rumen.Job20LineHistoryEventEmitter --> <class name="Job20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.HistoryEventEmitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </class> <!-- end class org.apache.hadoop.tools.rumen.Job20LineHistoryEventEmitter --> <!-- start class org.apache.hadoop.tools.rumen.JobBuilder --> <class name="JobBuilder" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="JobBuilder" type="java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getJobID" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="process" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="event" type="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent"/> <doc> <![CDATA[Process one {@link HistoryEvent} @param event The {@link HistoryEvent} to be processed.]]> </doc> </method> <method name="process" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="java.util.Properties"/> <doc> <![CDATA[Process a collection of JobConf {@link Properties}. We do not restrict it to be called once. It is okay to process a conf before, during or after the events. @param conf The job conf properties to be added.]]> </doc> </method> <method name="build" return="org.apache.hadoop.tools.rumen.LoggedJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Request the builder to build the final object. Once called, the {@link JobBuilder} would accept no more events or job-conf properties. @return Parsed {@link LoggedJob} object.]]> </doc> </method> <doc> <![CDATA[{@link JobBuilder} builds one job. It processes a sequence of {@link HistoryEvent}s.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.JobBuilder --> <!-- start class org.apache.hadoop.tools.rumen.JobConfigurationParser --> <class name="JobConfigurationParser" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="JobConfigurationParser" type="java.util.List" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor @param interested properties we should extract from the job configuration xml.]]> </doc> </constructor> <doc> <![CDATA[{@link JobConfigurationParser} parses the job configuration xml file, and extracts various framework specific properties. It parses the file using a stream-parser and thus is more memory efficient. [This optimization may be postponed for a future release]]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.JobConfigurationParser --> <!-- start class org.apache.hadoop.tools.rumen.JobConfPropertyNames --> <class name="JobConfPropertyNames" extends="java.lang.Enum" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.tools.rumen.JobConfPropertyNames[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.tools.rumen.JobConfPropertyNames" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <method name="getCandidates" return="java.lang.String[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.JobConfPropertyNames --> <!-- start interface org.apache.hadoop.tools.rumen.JobHistoryParser --> <interface name="JobHistoryParser" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.io.Closeable"/> <method name="nextEvent" return="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the next {@link HistoryEvent} @return the next {@link HistoryEvent}. If no more events left, return null. @throws IOException]]> </doc> </method> <doc> <![CDATA[{@link JobHistoryParser} defines the interface of a Job History file parser.]]> </doc> </interface> <!-- end interface org.apache.hadoop.tools.rumen.JobHistoryParser --> <!-- start class org.apache.hadoop.tools.rumen.JobHistoryParserFactory --> <class name="JobHistoryParserFactory" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="JobHistoryParserFactory" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getParser" return="org.apache.hadoop.tools.rumen.JobHistoryParser" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="ris" type="org.apache.hadoop.tools.rumen.RewindableInputStream"/> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[{@link JobHistoryParserFactory} is a singleton class that attempts to determine the version of job history and return a proper parser.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.JobHistoryParserFactory --> <!-- start class org.apache.hadoop.tools.rumen.JobHistoryParserFactory.VersionDetector --> <class name="JobHistoryParserFactory.VersionDetector" extends="java.lang.Enum" abstract="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.tools.rumen.JobHistoryParserFactory.VersionDetector[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.tools.rumen.JobHistoryParserFactory.VersionDetector" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.JobHistoryParserFactory.VersionDetector --> <!-- start interface org.apache.hadoop.tools.rumen.JobStory --> <interface name="JobStory" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link JobConf} for the job. @return the <code>JobConf</code> for the job]]> </doc> </method> <method name="getName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the job name. @return the job name]]> </doc> </method> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the job ID @return the job ID]]> </doc> </method> <method name="getUser" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the user who ran the job. @return the user who ran the job]]> </doc> </method> <method name="getSubmissionTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the job submission time. @return the job submission time]]> </doc> </method> <method name="getNumberMaps" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of maps in the {@link JobStory}. @return the number of maps in the <code>Job</code>]]> </doc> </method> <method name="getNumberReduces" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of reduce in the {@link JobStory}. @return the number of reduces in the <code>Job</code>]]> </doc> </method> <method name="getInputSplits" return="org.apache.hadoop.mapreduce.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the input splits for the job. @return the input splits for the job]]> </doc> </method> <method name="getTaskInfo" return="org.apache.hadoop.tools.rumen.TaskInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/> <param name="taskNumber" type="int"/> <doc> <![CDATA[Get {@link TaskInfo} for a given task. @param taskType {@link TaskType} of the task @param taskNumber Partition number of the task @return the <code>TaskInfo</code> for the given task]]> </doc> </method> <method name="getTaskAttemptInfo" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/> <param name="taskNumber" type="int"/> <param name="taskAttemptNumber" type="int"/> <doc> <![CDATA[Get {@link TaskAttemptInfo} for a given task-attempt, without regard to impact of locality (e.g. not needed to make scheduling decisions). @param taskType {@link TaskType} of the task-attempt @param taskNumber Partition number of the task-attempt @param taskAttemptNumber Attempt number of the task @return the <code>TaskAttemptInfo</code> for the given task-attempt]]> </doc> </method> <method name="getMapTaskAttemptInfoAdjusted" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskNumber" type="int"/> <param name="taskAttemptNumber" type="int"/> <param name="locality" type="int"/> <doc> <![CDATA[Get {@link TaskAttemptInfo} for a given task-attempt, considering impact of locality. @param taskNumber Partition number of the task-attempt @param taskAttemptNumber Attempt number of the task @param locality Data locality of the task as scheduled in simulation @return the <code>TaskAttemptInfo</code> for the given task-attempt]]> </doc> </method> <method name="getOutcome" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the outcome of the job execution. @return The outcome of the job execution.]]> </doc> </method> <method name="getQueueName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the queue where the job is submitted. @return the queue where the job is submitted.]]> </doc> </method> <doc> <![CDATA[{@link JobStory} represents the runtime information available for a completed Map-Reduce job.]]> </doc> </interface> <!-- end interface org.apache.hadoop.tools.rumen.JobStory --> <!-- start interface org.apache.hadoop.tools.rumen.JobStoryProducer --> <interface name="JobStoryProducer" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.io.Closeable"/> <method name="getNextJob" return="org.apache.hadoop.tools.rumen.JobStory" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Get the next job. @return The next job. Or null if no more job is available. @throws IOException]]> </doc> </method> <doc> <![CDATA[{@link JobStoryProducer} produces the sequence of {@link JobStory}'s.]]> </doc> </interface> <!-- end interface org.apache.hadoop.tools.rumen.JobStoryProducer --> <!-- start class org.apache.hadoop.tools.rumen.JobTraceReader --> <class name="JobTraceReader" extends="org.apache.hadoop.tools.rumen.JsonObjectMapperParser" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="JobTraceReader" type="org.apache.hadoop.fs.Path, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor. @param path Path to the JSON trace file, possibly compressed. @param conf @throws IOException]]> </doc> </constructor> <constructor name="JobTraceReader" type="java.io.InputStream" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor. @param input The input stream for the JSON trace.]]> </doc> </constructor> <doc> <![CDATA[Reading JSON-encoded job traces and produce {@link LoggedJob} instances.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.JobTraceReader --> <!-- start class org.apache.hadoop.tools.rumen.JsonObjectMapperWriter --> <class name="JsonObjectMapperWriter" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.io.Closeable"/> <constructor name="JsonObjectMapperWriter" type="java.io.OutputStream, boolean" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </constructor> <method name="write" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="object" type="T"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Simple wrapper around {@link JsonGenerator} to write objects in JSON format. @param <T> The type of the objects to be written.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.JsonObjectMapperWriter --> <!-- start class org.apache.hadoop.tools.rumen.LoggedDiscreteCDF --> <class name="LoggedDiscreteCDF" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.DeepCompare"/> <constructor name="LoggedDiscreteCDF" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getMinimum" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getRankings" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMaximum" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNumberValues" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> </method> <doc> <![CDATA[A {@link LoggedDiscreteCDF} is a discrete approximation of a cumulative distribution function, with this class set up to meet the requirements of the Jackson JSON parser/generator. All of the public methods are simply accessors for the instance variables we want to write out in the JSON files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedDiscreteCDF --> <!-- start class org.apache.hadoop.tools.rumen.LoggedJob --> <class name="LoggedJob" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.DeepCompare"/> <method name="setUnknownAttribute" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="attributeName" type="java.lang.String"/> <param name="ignored" type="java.lang.Object"/> </method> <method name="getUser" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobID" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPriority" return="org.apache.hadoop.tools.rumen.LoggedJob.JobPriority" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getComputonsPerMapInputByte" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getComputonsPerMapOutputByte" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getComputonsPerReduceInputByte" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getComputonsPerReduceOutputByte" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSubmitTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getLaunchTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFinishTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getHeapMegabytes" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTotalMaps" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTotalReduces" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutcome" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobtype" return="org.apache.hadoop.tools.rumen.LoggedJob.JobType" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getDirectDependantJobs" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapTasks" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getReduceTasks" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOtherTasks" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSuccessfulMapAttemptCDFs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFailedMapAttemptCDFs" return="java.util.ArrayList" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSuccessfulReduceAttemptCDF" return="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFailedReduceAttemptCDF" return="org.apache.hadoop.tools.rumen.LoggedDiscreteCDF" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapperTriesToSucceed" return="double[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFailedMapperFraction" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getRelativeTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getQueue" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getClusterMapMB" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getClusterReduceMB" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobMapMB" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobReduceMB" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> </method> <doc> <![CDATA[A {@link LoggedDiscreteCDF} is a representation of an hadoop job, with the details of this class set up to meet the requirements of the Jackson JSON parser/generator. All of the public methods are simply accessors for the instance variables we want to write out in the JSON files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedJob --> <!-- start class org.apache.hadoop.tools.rumen.LoggedJob.JobPriority --> <class name="LoggedJob.JobPriority" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.tools.rumen.LoggedJob.JobPriority[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.tools.rumen.LoggedJob.JobPriority" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedJob.JobPriority --> <!-- start class org.apache.hadoop.tools.rumen.LoggedJob.JobType --> <class name="LoggedJob.JobType" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.tools.rumen.LoggedJob.JobType[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.tools.rumen.LoggedJob.JobType" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedJob.JobType --> <!-- start class org.apache.hadoop.tools.rumen.LoggedLocation --> <class name="LoggedLocation" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.DeepCompare"/> <constructor name="LoggedLocation" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getLayers" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="setUnknownAttribute" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="attributeName" type="java.lang.String"/> <param name="ignored" type="java.lang.Object"/> </method> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> </method> <doc> <![CDATA[A {@link LoggedLocation} is a representation of a point in an hierarchical network, represented as a series of membership names, broadest first. For example, if your network has <i>hosts</i> grouped into <i>racks</i>, then in onecluster you might have a node {@code node1} on rack {@code rack1}. This would be represented with a ArrayList of two layers, with two {@link String} s being {@code "rack1"} and {@code "node1"}. The details of this class are set up to meet the requirements of the Jackson JSON parser/generator. All of the public methods are simply accessors for the instance variables we want to write out in the JSON files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedLocation --> <!-- start class org.apache.hadoop.tools.rumen.LoggedNetworkTopology --> <class name="LoggedNetworkTopology" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.DeepCompare"/> <constructor name="LoggedNetworkTopology" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setUnknownAttribute" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="attributeName" type="java.lang.String"/> <param name="ignored" type="java.lang.Object"/> </method> <method name="getName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getChildren" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> </method> <doc> <![CDATA[A {@link LoggedNetworkTopology} represents a tree that in turn represents a hierarchy of hosts. The current version requires the tree to have all leaves at the same level. All of the public methods are simply accessors for the instance variables we want to write out in the JSON files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedNetworkTopology --> <!-- start class org.apache.hadoop.tools.rumen.LoggedSingleRelativeRanking --> <class name="LoggedSingleRelativeRanking" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.DeepCompare"/> <constructor name="LoggedSingleRelativeRanking" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="setUnknownAttribute" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="attributeName" type="java.lang.String"/> <param name="ignored" type="java.lang.Object"/> </method> <method name="getRelativeRanking" return="double" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getDatum" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> </method> <doc> <![CDATA[A {@link LoggedSingleRelativeRanking} represents an X-Y coordinate of a single point in a discrete CDF. All of the public methods are simply accessors for the instance variables we want to write out in the JSON files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedSingleRelativeRanking --> <!-- start class org.apache.hadoop.tools.rumen.LoggedTask --> <class name="LoggedTask" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.DeepCompare"/> <method name="setUnknownAttribute" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="attributeName" type="java.lang.String"/> <param name="ignored" type="java.lang.Object"/> </method> <method name="getInputBytes" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getInputRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutputBytes" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutputRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTaskID" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getStartTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFinishTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getAttempts" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getPreferredLocations" return="java.util.List" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTaskStatus" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getTaskType" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="incorporateCounters" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counters" type="org.apache.hadoop.mapreduce.jobhistory.JhCounters"/> </method> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> </method> <doc> <![CDATA[A {@link LoggedTask} represents a [hadoop] task that is part of a hadoop job. It knows about the [pssibly empty] sequence of attempts, its I/O footprint, and its runtime. All of the public methods are simply accessors for the instance variables we want to write out in the JSON files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedTask --> <!-- start class org.apache.hadoop.tools.rumen.LoggedTaskAttempt --> <class name="LoggedTaskAttempt" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.DeepCompare"/> <method name="setUnknownAttribute" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="attributeName" type="java.lang.String"/> <param name="ignored" type="java.lang.Object"/> </method> <method name="getShuffleFinished" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSortFinished" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getAttemptID" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getResult" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getStartTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFinishTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getHostName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getHdfsBytesRead" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getHdfsBytesWritten" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFileBytesRead" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getFileBytesWritten" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapInputRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapOutputBytes" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapOutputRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getCombineInputRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getReduceInputGroups" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getReduceInputRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getReduceShuffleBytes" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getReduceOutputRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSpilledRecords" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getLocation" return="org.apache.hadoop.tools.rumen.LoggedLocation" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapInputBytes" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="incorporateCounters" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="counters" type="org.apache.hadoop.mapreduce.jobhistory.JhCounters"/> </method> <method name="deepCompare" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="comparand" type="org.apache.hadoop.tools.rumen.DeepCompare"/> <param name="loc" type="org.apache.hadoop.tools.rumen.TreePath"/> <exception name="DeepInequalityException" type="org.apache.hadoop.tools.rumen.DeepInequalityException"/> </method> <doc> <![CDATA[A {@link LoggedTaskAttempt} represents an attempt to run an hadoop task in a hadoop job. Note that a task can have several attempts. All of the public methods are simply accessors for the instance variables we want to write out in the JSON files.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.LoggedTaskAttempt --> <!-- start class org.apache.hadoop.tools.rumen.MachineNode --> <class name="MachineNode" extends="org.apache.hadoop.tools.rumen.Node" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="obj" type="java.lang.Object"/> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMemory" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the available physical RAM of the node. @return The available physical RAM of the node, in KB.]]> </doc> </method> <method name="getMapSlots" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of map slots of the node. @return The number of map slots of the node.]]> </doc> </method> <method name="getReduceSlots" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of reduce slots of the node. @return The number of reduce slots fo the node.]]> </doc> </method> <method name="getMemoryPerMapSlot" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the amount of RAM reserved for each map slot. @return the amount of RAM reserved for each map slot, in KB.]]> </doc> </method> <method name="getMemoryPerReduceSlot" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the amount of RAM reserved for each reduce slot. @return the amount of RAM reserved for each reduce slot, in KB.]]> </doc> </method> <method name="getNumCores" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the number of cores of the node. @return the number of cores of the node.]]> </doc> </method> <method name="getRackNode" return="org.apache.hadoop.tools.rumen.RackNode" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the rack node that the machine belongs to. @return The rack node that the machine belongs to. Returns null if the machine does not belong to any rack.]]> </doc> </method> <method name="addChild" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="child" type="org.apache.hadoop.tools.rumen.Node"/> </method> <doc> <![CDATA[{@link MachineNode} represents the configuration of a cluster node. {@link MachineNode} should be constructed by {@link MachineNode.Builder}.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.MachineNode --> <!-- start class org.apache.hadoop.tools.rumen.MachineNode.Builder --> <class name="MachineNode.Builder" extends="java.lang.Object" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <constructor name="MachineNode.Builder" type="java.lang.String, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Start building a new NodeInfo object. @param name Unique name of the node. Typically the fully qualified domain name.]]> </doc> </constructor> <method name="setMemory" return="org.apache.hadoop.tools.rumen.MachineNode.Builder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="memory" type="long"/> <doc> <![CDATA[Set the physical memory of the node. @param memory Available RAM in KB.]]> </doc> </method> <method name="setMapSlots" return="org.apache.hadoop.tools.rumen.MachineNode.Builder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="mapSlots" type="int"/> <doc> <![CDATA[Set the number of map slot for the node. @param mapSlots The number of map slots for the node.]]> </doc> </method> <method name="setReduceSlots" return="org.apache.hadoop.tools.rumen.MachineNode.Builder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="reduceSlots" type="int"/> <doc> <![CDATA[Set the number of reduce slot for the node. @param reduceSlots The number of reduce slots for the node.]]> </doc> </method> <method name="setMemoryPerMapSlot" return="org.apache.hadoop.tools.rumen.MachineNode.Builder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="memoryPerMapSlot" type="long"/> <doc> <![CDATA[Set the amount of RAM reserved for each map slot. @param memoryPerMapSlot The amount of RAM reserved for each map slot, in KB.]]> </doc> </method> <method name="setMemoryPerReduceSlot" return="org.apache.hadoop.tools.rumen.MachineNode.Builder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="memoryPerReduceSlot" type="long"/> <doc> <![CDATA[Set the amount of RAM reserved for each reduce slot. @param memoryPerReduceSlot The amount of RAM reserved for each reduce slot, in KB.]]> </doc> </method> <method name="setNumCores" return="org.apache.hadoop.tools.rumen.MachineNode.Builder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="numCores" type="int"/> <doc> <![CDATA[Set the number of cores for the node. @param numCores Number of cores for the node.]]> </doc> </method> <method name="cloneFrom" return="org.apache.hadoop.tools.rumen.MachineNode.Builder" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="ref" type="org.apache.hadoop.tools.rumen.MachineNode"/> <doc> <![CDATA[Clone the settings from a reference {@link MachineNode} object. @param ref The reference {@link MachineNode} object.]]> </doc> </method> <method name="build" return="org.apache.hadoop.tools.rumen.MachineNode" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Build the {@link MachineNode} object. @return The {@link MachineNode} object being built.]]> </doc> </method> <doc> <![CDATA[Builder for a NodeInfo object]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.MachineNode.Builder --> <!-- start class org.apache.hadoop.tools.rumen.MapAttempt20LineHistoryEventEmitter --> <class name="MapAttempt20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MapAttempt20LineHistoryEventEmitter" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> </class> <!-- end class org.apache.hadoop.tools.rumen.MapAttempt20LineHistoryEventEmitter --> <!-- start class org.apache.hadoop.tools.rumen.MapTaskAttemptInfo --> <class name="MapTaskAttemptInfo" extends="org.apache.hadoop.tools.rumen.TaskAttemptInfo" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="MapTaskAttemptInfo" type="org.apache.hadoop.mapred.TaskStatus.State, org.apache.hadoop.tools.rumen.TaskInfo, long" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getRuntime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getMapRuntime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the runtime for the <b>map</b> phase of the map-task attempt. @return the runtime for the <b>map</b> phase of the map-task attempt]]> </doc> </method> <doc> <![CDATA[{@link MapTaskAttemptInfo} represents the information with regard to a map task attempt.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.MapTaskAttemptInfo --> <!-- start class org.apache.hadoop.tools.rumen.Node --> <class name="Node" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.lang.Comparable"/> <constructor name="Node" type="java.lang.String, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@param name A unique name to identify a node in the cluster. @param level The level of the node in the cluster]]> </doc> </constructor> <method name="getName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the name of the node. @return The name of the node.]]> </doc> </method> <method name="getLevel" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the level of the node. @return The level of the node.]]> </doc> </method> <method name="addChild" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="child" type="org.apache.hadoop.tools.rumen.Node"/> <doc> <![CDATA[Add a child node to this node. @param child The child node to be added. The child node should currently not be belong to another cluster topology. @return Boolean indicating whether the node is successfully added.]]> </doc> </method> <method name="hasChildren" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Does this node have any children? @return Boolean indicate whether this node has any children.]]> </doc> </method> <method name="getChildren" return="java.util.Set" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the children of this node. @return The children of this node. If no child, an empty set will be returned. The returned set is read-only.]]> </doc> </method> <method name="getParent" return="org.apache.hadoop.tools.rumen.Node" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the parent node. @return the parent node. If root node, return null.]]> </doc> </method> <method name="hashCode" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="equals" return="boolean" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="obj" type="java.lang.Object"/> </method> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="compareTo" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="o" type="org.apache.hadoop.tools.rumen.Node"/> </method> <doc> <![CDATA[{@link Node} represents a node in the cluster topology. A node can be a {@link MachineNode}, or a {@link RackNode}, etc.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.Node --> <!-- start interface org.apache.hadoop.tools.rumen.Outputter --> <interface name="Outputter" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="java.io.Closeable"/> <method name="init" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="path" type="org.apache.hadoop.fs.Path"/> <param name="conf" type="org.apache.hadoop.conf.Configuration"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Initialize the {@link Outputter} to a specific path. @param path The {@link Path} to the output file. @param conf Configuration @throws IOException]]> </doc> </method> <method name="output" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="object" type="T"/> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Output an object. @param object The objecte. @throws IOException]]> </doc> </method> <doc> <![CDATA[Interface to output a sequence of objects of type T.]]> </doc> </interface> <!-- end interface org.apache.hadoop.tools.rumen.Outputter --> <!-- start class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants --> <class name="Pre21JobHistoryConstants" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Pre21JobHistoryConstants" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> </class> <!-- end class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants --> <!-- start class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values --> <class name="Pre21JobHistoryConstants.Values" extends="java.lang.Enum" abstract="false" static="true" final="true" visibility="public" deprecated="not deprecated"> <method name="values" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values[]" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="valueOf" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="name" type="java.lang.String"/> </method> <doc> <![CDATA[This enum contains some of the values commonly used by history log events. since values in history can only be strings - Values.name() is used in most places in history file.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values --> <!-- start class org.apache.hadoop.tools.rumen.RackNode --> <class name="RackNode" extends="org.apache.hadoop.tools.rumen.Node" abstract="false" static="false" final="true" visibility="public" deprecated="not deprecated"> <constructor name="RackNode" type="java.lang.String, int" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="addChild" return="boolean" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="child" type="org.apache.hadoop.tools.rumen.Node"/> </method> <method name="getMachinesInRack" return="java.util.Set" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the machine nodes that belong to the rack. @return The machine nodes that belong to the rack.]]> </doc> </method> <doc> <![CDATA[{@link RackNode} represents a rack node in the cluster topology.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.RackNode --> <!-- start class org.apache.hadoop.tools.rumen.RandomSeedGenerator --> <class name="RandomSeedGenerator" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="RandomSeedGenerator" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getSeed" return="long" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="streamId" type="java.lang.String"/> <param name="masterSeed" type="long"/> <doc> <![CDATA[Generates a new random seed. @param streamId a string identifying the stream of random numbers @param masterSeed higher level master random seed @return the random seed. Different (streamId, masterSeed) pairs result in (vastly) different random seeds.]]> </doc> </method> <doc> <![CDATA[The purpose of this class is to generate new random seeds from a master seed. This is needed to make the Random().next*() calls in rumen and mumak deterministic so that mumak simulations become deterministically replayable. In these tools we need many independent streams of random numbers, some of which are created dynamically. We seed these streams with the sub-seeds returned by RandomSeedGenerator. For a slightly more complicated approach to generating multiple streams of random numbers with better theoretical guarantees, see P. L'Ecuyer, R. Simard, E. J. Chen, and W. D. Kelton, ``An Objected-Oriented Random-Number Package with Many Long Streams and Substreams'', Operations Research, 50, 6 (2002), 1073--1075 http://www.iro.umontreal.ca/~lecuyer/papers.html http://www.iro.umontreal.ca/~lecuyer/myftp/streams00/]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.RandomSeedGenerator --> <!-- start class org.apache.hadoop.tools.rumen.ReduceAttempt20LineHistoryEventEmitter --> <class name="ReduceAttempt20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </class> <!-- end class org.apache.hadoop.tools.rumen.ReduceAttempt20LineHistoryEventEmitter --> <!-- start class org.apache.hadoop.tools.rumen.ReduceTaskAttemptInfo --> <class name="ReduceTaskAttemptInfo" extends="org.apache.hadoop.tools.rumen.TaskAttemptInfo" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ReduceTaskAttemptInfo" type="org.apache.hadoop.mapred.TaskStatus.State, org.apache.hadoop.tools.rumen.TaskInfo, long, long, long" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getReduceRuntime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the runtime for the <b>reduce</b> phase of the reduce task-attempt. @return the runtime for the <b>reduce</b> phase of the reduce task-attempt]]> </doc> </method> <method name="getShuffleRuntime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the runtime for the <b>shuffle</b> phase of the reduce task-attempt. @return the runtime for the <b>shuffle</b> phase of the reduce task-attempt]]> </doc> </method> <method name="getMergeRuntime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the runtime for the <b>merge</b> phase of the reduce task-attempt @return the runtime for the <b>merge</b> phase of the reduce task-attempt]]> </doc> </method> <method name="getRuntime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[{@link ReduceTaskAttemptInfo} represents the information with regard to a reduce task attempt.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.ReduceTaskAttemptInfo --> <!-- start class org.apache.hadoop.tools.rumen.RewindableInputStream --> <class name="RewindableInputStream" extends="java.io.InputStream" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="RewindableInputStream" type="java.io.InputStream" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor. @param input]]> </doc> </constructor> <constructor name="RewindableInputStream" type="java.io.InputStream, int" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Constructor @param input input stream. @param maxBytesToRemember Maximum number of bytes we need to remember at the beginning of the stream. If {@link #rewind()} is called after so many bytes are read from the stream, {@link #rewind()} would fail.]]> </doc> </constructor> <method name="read" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="read" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="buffer" type="byte[]"/> <param name="offset" type="int"/> <param name="length" type="int"/> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="rewind" return="java.io.InputStream" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[A simple wrapper class to make any input stream "rewindable". It could be made more memory efficient by grow the internal buffer adaptively.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.RewindableInputStream --> <!-- start class org.apache.hadoop.tools.rumen.Task20LineHistoryEventEmitter --> <class name="Task20LineHistoryEventEmitter" extends="org.apache.hadoop.tools.rumen.HistoryEventEmitter" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="Task20LineHistoryEventEmitter" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> </class> <!-- end class org.apache.hadoop.tools.rumen.Task20LineHistoryEventEmitter --> <!-- start class org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter --> <class name="TaskAttempt20LineEventEmitter" extends="org.apache.hadoop.tools.rumen.HistoryEventEmitter" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskAttempt20LineEventEmitter" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> </class> <!-- end class org.apache.hadoop.tools.rumen.TaskAttempt20LineEventEmitter --> <!-- start class org.apache.hadoop.tools.rumen.TaskAttemptInfo --> <class name="TaskAttemptInfo" extends="java.lang.Object" abstract="true" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskAttemptInfo" type="org.apache.hadoop.mapred.TaskStatus.State, org.apache.hadoop.tools.rumen.TaskInfo" static="false" final="false" visibility="protected" deprecated="not deprecated"> </constructor> <method name="getRunState" return="org.apache.hadoop.mapred.TaskStatus.State" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the final {@link State} of the task-attempt. @return the final <code>State</code> of the task-attempt]]> </doc> </method> <method name="getRuntime" return="long" abstract="true" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the total runtime for the task-attempt. @return the total runtime for the task-attempt]]> </doc> </method> <method name="getTaskInfo" return="org.apache.hadoop.tools.rumen.TaskInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the {@link TaskInfo} for the given task-attempt. @return the <code>TaskInfo</code> for the given task-attempt]]> </doc> </method> <field name="state" type="org.apache.hadoop.mapred.TaskStatus.State" transient="false" volatile="false" static="false" final="true" visibility="protected" deprecated="not deprecated"> </field> <field name="taskInfo" type="org.apache.hadoop.tools.rumen.TaskInfo" transient="false" volatile="false" static="false" final="true" visibility="protected" deprecated="not deprecated"> </field> <doc> <![CDATA[{@link TaskAttemptInfo} is a collection of statistics about a particular task-attempt gleaned from job-history of the job.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.TaskAttemptInfo --> <!-- start class org.apache.hadoop.tools.rumen.TaskInfo --> <class name="TaskInfo" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TaskInfo" type="long, int, long, int, long" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="getInputBytes" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Raw bytes read from the FileSystem into the task. Note that this may not always match the input bytes to the task.]]> </doc> </method> <method name="getInputRecords" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Number of records input to this task.]]> </doc> </method> <method name="getOutputBytes" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Raw bytes written to the destination FileSystem. Note that this may not match output bytes.]]> </doc> </method> <method name="getOutputRecords" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Number of records output from this task.]]> </doc> </method> <method name="getTaskMemory" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[@return Memory used by the task leq the heap size.]]> </doc> </method> </class> <!-- end class org.apache.hadoop.tools.rumen.TaskInfo --> <!-- start class org.apache.hadoop.tools.rumen.TopologyBuilder --> <class name="TopologyBuilder" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TopologyBuilder" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="process" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="event" type="org.apache.hadoop.mapreduce.jobhistory.HistoryEvent"/> <doc> <![CDATA[Process one {@link HistoryEvent} @param event The {@link HistoryEvent} to be processed.]]> </doc> </method> <method name="process" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="conf" type="java.util.Properties"/> <doc> <![CDATA[Process a collection of JobConf {@link Properties}. We do not restrict it to be called once. @param conf The job conf properties to be added.]]> </doc> </method> <method name="build" return="org.apache.hadoop.tools.rumen.LoggedNetworkTopology" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Request the builder to build the final object. Once called, the {@link TopologyBuilder} would accept no more events or job-conf properties. @return Parsed {@link LoggedNetworkTopology} object.]]> </doc> </method> <doc> <![CDATA[Building the cluster topology.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.TopologyBuilder --> <!-- start class org.apache.hadoop.tools.rumen.TraceBuilder --> <class name="TraceBuilder" extends="org.apache.hadoop.conf.Configured" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.util.Tool"/> <constructor name="TraceBuilder" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="main" abstract="false" native="false" synchronized="false" static="true" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> </method> <method name="run" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="args" type="java.lang.String[]"/> <exception name="Exception" type="java.lang.Exception"/> </method> <doc> <![CDATA[The main driver of the Rumen Parser.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.TraceBuilder --> <!-- start class org.apache.hadoop.tools.rumen.TreePath --> <class name="TreePath" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="TreePath" type="org.apache.hadoop.tools.rumen.TreePath, java.lang.String" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <constructor name="TreePath" type="org.apache.hadoop.tools.rumen.TreePath, java.lang.String, int" static="false" final="false" visibility="public" deprecated="not deprecated"> </constructor> <method name="toString" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[This describes a path from a node to the root. We use it when we compare two trees during rumen unit tests. If the trees are not identical, this chain will be converted to a string which describes the path from the root to the fields that did not compare.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.TreePath --> <!-- start class org.apache.hadoop.tools.rumen.ZombieCluster --> <class name="ZombieCluster" extends="org.apache.hadoop.tools.rumen.AbstractClusterStory" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <constructor name="ZombieCluster" type="org.apache.hadoop.tools.rumen.LoggedNetworkTopology, org.apache.hadoop.tools.rumen.MachineNode" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Construct a homogeneous cluster. We assume that the leaves on the topology are {@link MachineNode}s, and the parents of {@link MachineNode}s are {@link RackNode}s. We also expect all leaf nodes are on the same level. @param topology The network topology. @param defaultNode The default node setting.]]> </doc> </constructor> <constructor name="ZombieCluster" type="org.apache.hadoop.fs.Path, org.apache.hadoop.tools.rumen.MachineNode, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Construct a homogeneous cluster. We assume that the leaves on the topology are {@link MachineNode}s, and the parents of {@link MachineNode}s are {@link RackNode}s. We also expect all leaf nodes are on the same level. @param path Path to the JSON-encoded topology file. @param conf @param defaultNode The default node setting. @throws IOException]]> </doc> </constructor> <constructor name="ZombieCluster" type="java.io.InputStream, org.apache.hadoop.tools.rumen.MachineNode" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Construct a homogeneous cluster. We assume that the leaves on the topology are {@link MachineNode}s, and the parents of {@link MachineNode}s are {@link RackNode}s. We also expect all leaf nodes are on the same level. @param input The input stream for the JSON-encoded topology file. @param defaultNode The default node setting. @throws IOException]]> </doc> </constructor> <method name="getClusterTopology" return="org.apache.hadoop.tools.rumen.Node" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <doc> <![CDATA[{@link ZombieCluster} rebuilds the cluster topology using the information obtained from job history logs.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.ZombieCluster --> <!-- start class org.apache.hadoop.tools.rumen.ZombieJob --> <class name="ZombieJob" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.JobStory"/> <constructor name="ZombieJob" type="org.apache.hadoop.tools.rumen.LoggedJob, org.apache.hadoop.tools.rumen.ClusterStory, long" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[This constructor creates a {@link ZombieJob} with the same semantics as the {@link LoggedJob} passed in this parameter @param job The dead job this ZombieJob instance is based on. @param cluster The cluster topology where the dead job ran on. This argument can be null if we do not have knowledge of the cluster topology. @param seed Seed for the random number generator for filling in information not available from the ZombieJob.]]> </doc> </constructor> <constructor name="ZombieJob" type="org.apache.hadoop.tools.rumen.LoggedJob, org.apache.hadoop.tools.rumen.ClusterStory" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[This constructor creates a {@link ZombieJob} with the same semantics as the {@link LoggedJob} passed in this parameter @param job The dead job this ZombieJob instance is based on. @param cluster The cluster topology where the dead job ran on. This argument can be null if we do not have knowledge of the cluster topology.]]> </doc> </constructor> <method name="getJobConf" return="org.apache.hadoop.mapred.JobConf" abstract="false" native="false" synchronized="true" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getInputSplits" return="org.apache.hadoop.mapreduce.InputSplit[]" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getJobID" return="org.apache.hadoop.mapreduce.JobID" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNumberMaps" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNumberReduces" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getOutcome" return="org.apache.hadoop.tools.rumen.Pre21JobHistoryConstants.Values" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getSubmissionTime" return="long" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getQueueName" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getNumLoggedMaps" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Getting the number of map tasks that are actually logged in the trace. @return The number of map tasks that are actually logged in the trace.]]> </doc> </method> <method name="getNumLoggedReduces" return="int" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Getting the number of reduce tasks that are actually logged in the trace. @return The number of map tasks that are actually logged in the trace.]]> </doc> </method> <method name="getUser" return="java.lang.String" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> </method> <method name="getLoggedJob" return="org.apache.hadoop.tools.rumen.LoggedJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <doc> <![CDATA[Get the underlining {@link LoggedJob} object read directly from the trace. This is mainly for debugging. @return the underlining {@link LoggedJob} object]]> </doc> </method> <method name="getTaskAttemptInfo" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/> <param name="taskNumber" type="int"/> <param name="taskAttemptNumber" type="int"/> <doc> <![CDATA[Get a {@link TaskAttemptInfo} with a {@link TaskAttemptID} associated with taskType, taskNumber, and taskAttemptNumber. This function does not care about locality, and follows the following decision logic: 1. Make up a {@link TaskAttemptInfo} if the task attempt is missing in trace, 2. Make up a {@link TaskAttemptInfo} if the task attempt has a KILLED final status in trace, 3. Otherwise (final state is SUCCEEDED or FAILED), construct the {@link TaskAttemptInfo} from the trace.]]> </doc> </method> <method name="getTaskInfo" return="org.apache.hadoop.tools.rumen.TaskInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskType" type="org.apache.hadoop.mapreduce.TaskType"/> <param name="taskNumber" type="int"/> </method> <method name="getMapTaskAttemptInfoAdjusted" return="org.apache.hadoop.tools.rumen.TaskAttemptInfo" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <param name="taskNumber" type="int"/> <param name="taskAttemptNumber" type="int"/> <param name="locality" type="int"/> <doc> <![CDATA[Get a {@link TaskAttemptInfo} with a {@link TaskAttemptID} associated with taskType, taskNumber, and taskAttemptNumber. This function considers locality, and follows the following decision logic: 1. Make up a {@link TaskAttemptInfo} if the task attempt is missing in trace, 2. Make up a {@link TaskAttemptInfo} if the task attempt has a KILLED final status in trace, 3. If final state is FAILED, construct a {@link TaskAttemptInfo} from the trace, without considering locality. 4. If final state is SUCCEEDED, construct a {@link TaskAttemptInfo} from the trace, with runtime scaled according to locality in simulation and locality in trace.]]> </doc> </method> <doc> <![CDATA[{@link ZombieJob} is a layer above {@link LoggedJob} raw JSON objects. Each {@link ZombieJob} object represents a job in job history. For everything that exists in job history, contents are returned unchanged faithfully. To get input splits of a non-exist task, a non-exist task attempt, or an ill-formed task attempt, proper objects are made up from statistical sketches.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.ZombieJob --> <!-- start class org.apache.hadoop.tools.rumen.ZombieJobProducer --> <class name="ZombieJobProducer" extends="java.lang.Object" abstract="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <implements name="org.apache.hadoop.tools.rumen.JobStoryProducer"/> <constructor name="ZombieJobProducer" type="org.apache.hadoop.fs.Path, org.apache.hadoop.tools.rumen.ZombieCluster, org.apache.hadoop.conf.Configuration" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor @param path Path to the JSON trace file, possibly compressed. @param cluster The topology of the cluster that corresponds to the jobs in the trace. The argument can be null if we do not have knowledge of the cluster topology. @param conf @throws IOException]]> </doc> </constructor> <constructor name="ZombieJobProducer" type="org.apache.hadoop.fs.Path, org.apache.hadoop.tools.rumen.ZombieCluster, org.apache.hadoop.conf.Configuration, long" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor @param path Path to the JSON trace file, possibly compressed. @param cluster The topology of the cluster that corresponds to the jobs in the trace. The argument can be null if we do not have knowledge of the cluster topology. @param conf @param randomSeed use a deterministic seed. @throws IOException]]> </doc> </constructor> <constructor name="ZombieJobProducer" type="java.io.InputStream, org.apache.hadoop.tools.rumen.ZombieCluster" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor @param input The input stream for the JSON trace. @param cluster The topology of the cluster that corresponds to the jobs in the trace. The argument can be null if we do not have knowledge of the cluster topology. @throws IOException]]> </doc> </constructor> <constructor name="ZombieJobProducer" type="java.io.InputStream, org.apache.hadoop.tools.rumen.ZombieCluster, long" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> <doc> <![CDATA[Constructor @param input The input stream for the JSON trace. @param cluster The topology of the cluster that corresponds to the jobs in the trace. The argument can be null if we do not have knowledge of the cluster topology. @param randomSeed use a deterministic seed. @throws IOException]]> </doc> </constructor> <method name="getNextJob" return="org.apache.hadoop.tools.rumen.ZombieJob" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <method name="close" abstract="false" native="false" synchronized="false" static="false" final="false" visibility="public" deprecated="not deprecated"> <exception name="IOException" type="java.io.IOException"/> </method> <doc> <![CDATA[Producing {@link JobStory}s from job trace.]]> </doc> </class> <!-- end class org.apache.hadoop.tools.rumen.ZombieJobProducer --> </package> </api>