hadoop/hadoop-mapreduce/lib/jdiff/hadoop-mapred_0.20.0.xml

21033 lines
874 KiB
XML

<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!-- Generated by the JDiff Javadoc doclet -->
<!-- (http://www.jdiff.org) -->
<!-- on Sun May 31 20:42:50 PDT 2009 -->
<api
xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance'
xsi:noNamespaceSchemaLocation='api.xsd'
name="hadoopp-mapred 0.20.0"
jdversion="1.0.9">
<!-- Command line arguments = -doclet jdiff.JDiff -docletpath /home/gkesavan/release-0.20.0/build/ivy/lib/Hadoop/jdiff/jdiff-1.0.9.jar:/home/gkesavan/release-0.20.0/build/ivy/lib/Hadoop/jdiff/xerces-1.4.4.jar -classpath /home/gkesavan/release-0.20.0/build/classes:/home/gkesavan/release-0.20.0/lib/commons-cli-2.0-SNAPSHOT.jar:/home/gkesavan/release-0.20.0/lib/hsqldb-1.8.0.10.jar:/home/gkesavan/release-0.20.0/lib/jsp-2.1/jsp-2.1.jar:/home/gkesavan/release-0.20.0/lib/jsp-2.1/jsp-api-2.1.jar:/home/gkesavan/release-0.20.0/lib/kfs-0.2.2.jar:/home/gkesavan/release-0.20.0/conf:/home/gkesavan/.ivy2/cache/commons-logging/commons-logging/jars/commons-logging-1.0.4.jar:/home/gkesavan/.ivy2/cache/log4j/log4j/jars/log4j-1.2.15.jar:/home/gkesavan/.ivy2/cache/commons-httpclient/commons-httpclient/jars/commons-httpclient-3.0.1.jar:/home/gkesavan/.ivy2/cache/commons-codec/commons-codec/jars/commons-codec-1.3.jar:/home/gkesavan/.ivy2/cache/xmlenc/xmlenc/jars/xmlenc-0.52.jar:/home/gkesavan/.ivy2/cache/net.java.dev.jets3t/jets3t/jars/jets3t-0.6.1.jar:/home/gkesavan/.ivy2/cache/commons-net/commons-net/jars/commons-net-1.4.1.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/servlet-api-2.5/jars/servlet-api-2.5-6.1.14.jar:/home/gkesavan/.ivy2/cache/oro/oro/jars/oro-2.0.8.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/jetty/jars/jetty-6.1.14.jar:/home/gkesavan/.ivy2/cache/org.mortbay.jetty/jetty-util/jars/jetty-util-6.1.14.jar:/home/gkesavan/.ivy2/cache/tomcat/jasper-runtime/jars/jasper-runtime-5.5.12.jar:/home/gkesavan/.ivy2/cache/tomcat/jasper-compiler/jars/jasper-compiler-5.5.12.jar:/home/gkesavan/.ivy2/cache/commons-el/commons-el/jars/commons-el-1.0.jar:/home/gkesavan/.ivy2/cache/junit/junit/jars/junit-3.8.1.jar:/home/gkesavan/.ivy2/cache/commons-logging/commons-logging-api/jars/commons-logging-api-1.0.4.jar:/home/gkesavan/.ivy2/cache/org.slf4j/slf4j-api/jars/slf4j-api-1.4.3.jar:/home/gkesavan/.ivy2/cache/org.eclipse.jdt/core/jars/core-3.1.1.jar:/home/gkesavan/.ivy2/cache/org.slf4j/slf4j-log4j12/jars/slf4j-log4j12-1.4.3.jar:/home/gkesavan/.ivy2/cache/jdiff/jdiff/jars/jdiff-1.0.9.jar:/home/gkesavan/.ivy2/cache/xerces/xerces/jars/xerces-1.4.4.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-launcher.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-resolver.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-starteam.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-netrexx.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-testutil.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jai.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-swing.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jmf.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-bcel.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jdepend.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-jsch.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-bsf.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-antlr.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-weblogic.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-junit.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-log4j.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/xercesImpl.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-oro.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-trax.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-nodeps.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-commons-logging.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-apache-regexp.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-stylebook.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-javamail.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/ant-commons-net.jar:/home/gkesavan/tools/apache-ant-1.7.1/lib/xml-apis.jar:/home/gkesavan/tools/jdk1.6.0_07-32bit/lib/tools.jar -sourcepath /home/gkesavan/release-0.20.0/src/mapred:/home/gkesavan/release-0.20.0/src/tools -apidir /home/gkesavan/release-0.20.0/lib/jdiff -apiname hadoop-mapred 0.20.0 -->
<package name="org.apache.hadoop.mapred">
<!-- start class org.apache.hadoop.mapred.ClusterStatus -->
<class name="ClusterStatus" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<method name="getTaskTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of task trackers in the cluster.
@return the number of task trackers in the cluster.]]>
</doc>
</method>
<method name="getActiveTrackerNames" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the names of task trackers in the cluster.
@return the active task trackers in the cluster.]]>
</doc>
</method>
<method name="getBlacklistedTrackerNames" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the names of task trackers in the cluster.
@return the blacklisted task trackers in the cluster.]]>
</doc>
</method>
<method name="getBlacklistedTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of blacklisted task trackers in the cluster.
@return the number of blacklisted task trackers in the cluster.]]>
</doc>
</method>
<method name="getTTExpiryInterval" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the tasktracker expiry interval for the cluster
@return the expiry interval in msec]]>
</doc>
</method>
<method name="getMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of currently running map tasks in the cluster.
@return the number of currently running map tasks in the cluster.]]>
</doc>
</method>
<method name="getReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of currently running reduce tasks in the cluster.
@return the number of currently running reduce tasks in the cluster.]]>
</doc>
</method>
<method name="getMaxMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum capacity for running map tasks in the cluster.
@return the maximum capacity for running map tasks in the cluster.]]>
</doc>
</method>
<method name="getMaxReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum capacity for running reduce tasks in the cluster.
@return the maximum capacity for running reduce tasks in the cluster.]]>
</doc>
</method>
<method name="getJobTrackerState" return="org.apache.hadoop.mapred.JobTracker.State"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the current state of the <code>JobTracker</code>,
as {@link JobTracker.State}
@return the current state of the <code>JobTracker</code>.]]>
</doc>
</method>
<method name="getUsedMemory" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the total heap memory used by the <code>JobTracker</code>
@return the size of heap memory used by the <code>JobTracker</code>]]>
</doc>
</method>
<method name="getMaxMemory" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum configured heap memory that can be used by the <code>JobTracker</code>
@return the configured size of max heap memory that can be used by the <code>JobTracker</code>]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Status information on the current state of the Map-Reduce cluster.
<p><code>ClusterStatus</code> provides clients with information such as:
<ol>
<li>
Size of the cluster.
</li>
<li>
Name of the trackers.
</li>
<li>
Task capacity of the cluster.
</li>
<li>
The number of currently running map & reduce tasks.
</li>
<li>
State of the <code>JobTracker</code>.
</li>
</ol></p>
<p>Clients can query for the latest <code>ClusterStatus</code>, via
{@link JobClient#getClusterStatus()}.</p>
@see JobClient]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.ClusterStatus -->
<!-- start class org.apache.hadoop.mapred.Counters -->
<class name="Counters" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Counters} instead.">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<constructor name="Counters"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getGroupNames" return="java.util.Collection"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the names of all counter classes.
@return Set of counter names.]]>
</doc>
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getGroup" return="org.apache.hadoop.mapred.Counters.Group"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<doc>
<![CDATA[Returns the named counter group, or an empty group if there is none
with the specified name.]]>
</doc>
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<doc>
<![CDATA[Find the counter for the given enum. The same enum will always return the
same counter.
@param key the counter key
@return the matching counter object]]>
</doc>
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Find a counter given the group and the name.
@param group the name of the group
@param name the internal name of the counter
@return the counter for that name]]>
</doc>
</method>
<method name="findCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<param name="group" type="java.lang.String"/>
<param name="id" type="int"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Find a counter by using strings
@param group the name of the group
@param id the id of the counter within the group (0 to N-1)
@param name the internal name of the counter
@return the counter for that name
@deprecated]]>
</doc>
</method>
<method name="incrCounter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the specified counter by the specified amount, creating it if
it didn't already exist.
@param key identifies a counter
@param amount amount by which counter is to be incremented]]>
</doc>
</method>
<method name="incrCounter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="counter" type="java.lang.String"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the specified counter by the specified amount, creating it if
it didn't already exist.
@param group the name of the group
@param counter the internal name of the counter
@param amount amount by which counter is to be incremented]]>
</doc>
</method>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<doc>
<![CDATA[Returns current value of the specified counter, or 0 if the counter
does not exist.]]>
</doc>
</method>
<method name="incrAllCounters"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Increments multiple counters by their amounts in another Counters
instance.
@param other the other Counters instance]]>
</doc>
</method>
<method name="sum" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="a" type="org.apache.hadoop.mapred.Counters"/>
<param name="b" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Convenience method for computing the sum of two sets of counters.]]>
</doc>
</method>
<method name="size" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the total number of counters, by summing the number of counters
in each group.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write the set of groups.
The external format is:
#groups (groupName group)*
i.e. the number of groups followed by 0 or more groups, where each
group is of the form:
groupDisplayName #counters (false | true counter)*
where each counter is of the form:
name (false | true displayName) value]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read a set of groups.]]>
</doc>
</method>
<method name="log"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="log" type="org.apache.commons.logging.Log"/>
<doc>
<![CDATA[Logs the current counter values.
@param log The log to use.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return textual representation of the counter values.]]>
</doc>
</method>
<method name="makeCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Convert a counters object into a single line that is easy to parse.
@return the string with "name=value" for each counter and separated by ","]]>
</doc>
</method>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Represent the counter in a textual format that can be converted back to
its object form
@return the string in the following format
{(groupname)(group-displayname)[(countername)(displayname)(value)][][]}{}{}]]>
</doc>
</method>
<method name="fromEscapedCompactString" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="compactString" type="java.lang.String"/>
<exception name="ParseException" type="java.text.ParseException"/>
<doc>
<![CDATA[Convert a stringified counter representation into a counter object. Note
that the counter can be recovered if its stringified using
{@link #makeEscapedCompactString()}.
@return a Counter]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="obj" type="java.lang.Object"/>
</method>
<doc>
<![CDATA[A set of named counters.
<p><code>Counters</code> represent global counters, defined either by the
Map-Reduce framework or applications. Each <code>Counter</code> can be of
any {@link Enum} type.</p>
<p><code>Counters</code> are bunched into {@link Group}s, each comprising of
counters from a particular <code>Enum</code> class.
@deprecated Use {@link org.apache.hadoop.mapreduce.Counters} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.Counters -->
<!-- start class org.apache.hadoop.mapred.Counters.Counter -->
<class name="Counters.Counter" extends="org.apache.hadoop.mapreduce.Counter"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="setDisplayName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newName" type="java.lang.String"/>
</method>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the compact stringified version of the counter in the format
[(actual-name)(display-name)(value)]]]>
</doc>
</method>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[What is the current value of this counter?
@return the current value]]>
</doc>
</method>
<doc>
<![CDATA[A counter record, comprising its name and value.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.Counters.Counter -->
<!-- start class org.apache.hadoop.mapred.Counters.Group -->
<class name="Counters.Group" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<method name="getName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns raw name of the group. This is the name of the enum class
for this group of counters.]]>
</doc>
</method>
<method name="getDisplayName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns localized name of the group. This is the same as getName() by
default, but different if an appropriate ResourceBundle is found.]]>
</doc>
</method>
<method name="setDisplayName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="displayName" type="java.lang.String"/>
<doc>
<![CDATA[Set the display name]]>
</doc>
</method>
<method name="makeEscapedCompactString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the compact stringified version of the group in the format
{(actual-name)(display-name)(value)[][][]} where [] are compact strings for the
counters within.]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="obj" type="java.lang.Object"/>
<doc>
<![CDATA[Checks for (content) equality of Groups]]>
</doc>
</method>
<method name="getCounter" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<doc>
<![CDATA[Returns the value of the specified counter, or 0 if the counter does
not exist.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="use {@link #getCounter(String)} instead">
<param name="id" type="int"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Get the counter for the given id and create it if it doesn't exist.
@param id the numeric id of the counter within the group
@param name the internal counter name
@return the counter
@deprecated use {@link #getCounter(String)} instead]]>
</doc>
</method>
<method name="getCounterForName" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Get the counter for the given name and create it if it doesn't exist.
@param name the internal counter name
@return the counter]]>
</doc>
</method>
<method name="size" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the number of counters in this group.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[<code>Group</code> of counters, comprising of counters from a particular
counter {@link Enum} class.
<p><code>Group</code>handles localization of the class name and the
counter names.</p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.Counters.Group -->
<!-- start class org.apache.hadoop.mapred.DefaultJobHistoryParser -->
<class name="DefaultJobHistoryParser" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="DefaultJobHistoryParser"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="parseJobTasks"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobHistoryFile" type="java.lang.String"/>
<param name="job" type="org.apache.hadoop.mapred.JobHistory.JobInfo"/>
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Populates a JobInfo object from the job's history log file.
@param jobHistoryFile history file for this job.
@param job a precreated JobInfo object, should be non-null.
@param fs FileSystem where historyFile is present.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Default parser for job history files. It creates object model from
job history file.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.DefaultJobHistoryParser -->
<!-- start class org.apache.hadoop.mapred.FileAlreadyExistsException -->
<class name="FileAlreadyExistsException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileAlreadyExistsException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="FileAlreadyExistsException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Used when target file already exists for any operation and
is not configured to be overwritten.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileAlreadyExistsException -->
<!-- start class org.apache.hadoop.mapred.FileInputFormat -->
<class name="FileInputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
instead.">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<constructor name="FileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setMinSplitSize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="minSplitSize" type="long"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="filename" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Is the given filename splitable? Usually, true, but if the file is
stream compressed, it will not be.
<code>FileInputFormat</code> implementations can override this and return
<code>false</code> to ensure that individual input files are never split-up
so that {@link Mapper}s process entire files.
@param fs the file system that the file is on
@param filename the file name to check
@return is this file splitable?]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setInputPathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="filter" type="java.lang.Class"/>
<doc>
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
@param filter the PathFilter class use for filtering the input paths.]]>
</doc>
</method>
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
@return the PathFilter instance set for the job, NULL if none has been set.]]>
</doc>
</method>
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[List input directories.
Subclasses may override to, e.g., select only files matching a regular
expression.
@param job the job to list input paths for
@return array of FileStatus objects
@throws IOException if zero items.]]>
</doc>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Splits files returned by {@link #listStatus(JobConf)} when
they're too big.]]>
</doc>
</method>
<method name="computeSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="goalSize" type="long"/>
<param name="minSize" type="long"/>
<param name="blockSize" type="long"/>
</method>
<method name="getBlockIndex" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<doc>
<![CDATA[Sets the given comma separated paths as the list of inputs
for the map-reduce job.
@param conf Configuration of the job
@param commaSeparatedPaths Comma separated paths to be set as
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<doc>
<![CDATA[Add the given comma separated paths to the list of inputs for
the map-reduce job.
@param conf The configuration of the job
@param commaSeparatedPaths Comma separated paths to be added to
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
<doc>
<![CDATA[Set the array of {@link Path}s as the list of inputs
for the map-reduce job.
@param conf Configuration of the job.
@param inputPaths the {@link Path}s of the input directories/files
for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
@param conf The configuration of the job
@param path {@link Path} to be added to the list of inputs for
the map-reduce job.]]>
</doc>
</method>
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
@param conf The configuration of the job
@return the list of input {@link Path}s for the map-reduce job.]]>
</doc>
</method>
<method name="getSplitHosts" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
<param name="splitSize" type="long"/>
<param name="clusterMap" type="org.apache.hadoop.net.NetworkTopology"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This function identifies and returns the hosts that contribute
most for a given split. For calculating the contribution, rack
locality is treated on par with host locality, so hosts from racks
that contribute the most are preferred over hosts on racks that
contribute less
@param blkLocations The list of block locations
@param offset
@param splitSize
@return array of hosts that contribute most to this split
@throws IOException]]>
</doc>
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A base class for file-based {@link InputFormat}.
<p><code>FileInputFormat</code> is the base class for all file-based
<code>InputFormat</code>s. This provides a generic implementation of
{@link #getSplits(JobConf, int)}.
Subclasses of <code>FileInputFormat</code> can also override the
{@link #isSplitable(FileSystem, Path)} method to ensure input-files are
not split-up and are processed as a whole by {@link Mapper}s.
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileInputFormat -->
<!-- start class org.apache.hadoop.mapred.FileOutputCommitter -->
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapred.OutputCommitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileOutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TEMP_DIR_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Temporary directory name]]>
</doc>
</field>
<doc>
<![CDATA[An {@link OutputCommitter} that commits files specified
in job output directory i.e. ${mapred.output.dir}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileOutputCommitter -->
<!-- start class org.apache.hadoop.mapred.FileOutputFormat -->
<class name="FileOutputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="FileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setCompressOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="compress" type="boolean"/>
<doc>
<![CDATA[Set whether the output of the job is compressed.
@param conf the {@link JobConf} to modify
@param compress should the output of the job be compressed?]]>
</doc>
</method>
<method name="getCompressOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Is the job output compressed?
@param conf the {@link JobConf} to look in
@return <code>true</code> if the job output should be compressed,
<code>false</code> otherwise]]>
</doc>
</method>
<method name="setOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="codecClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
@param conf the {@link JobConf} to modify
@param codecClass the {@link CompressionCodec} to be used to
compress the job outputs]]>
</doc>
</method>
<method name="getOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="defaultValue" type="java.lang.Class"/>
<doc>
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
@param conf the {@link JobConf} to look in
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} to be used to compress the
job outputs
@throws IllegalArgumentException if the class was specified, but not found]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setOutputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
@param conf The configuration of the job.
@param outputDir the {@link Path} of the output directory for
the map-reduce job.]]>
</doc>
</method>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
@return the {@link Path} to the output directory for the map-reduce job.
@see FileOutputFormat#getWorkOutputPath(JobConf)]]>
</doc>
</method>
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the {@link Path} to the task's temporary output directory
for the map-reduce job
<h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
<p><i>Note:</i> The following is valid only if the {@link OutputCommitter}
is {@link FileOutputCommitter}. If <code>OutputCommitter</code> is not
a <code>FileOutputCommitter</code>, the task's temporary output
directory is same as {@link #getOutputPath(JobConf)} i.e.
<tt>${mapred.output.dir}$</tt></p>
<p>Some applications need to create/write-to side-files, which differ from
the actual job-outputs.
<p>In such cases there could be issues with 2 instances of the same TIP
(running simultaneously e.g. speculative tasks) trying to open/write-to the
same file (path) on HDFS. Hence the application-writer will have to pick
unique names per task-attempt (e.g. using the attemptid, say
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
<p>To get around this the Map-Reduce framework helps the application-writer
out by maintaining a special
<tt>${mapred.output.dir}/_temporary/_${taskid}</tt>
sub-directory for each task-attempt on HDFS where the output of the
task-attempt goes. On successful completion of the task-attempt the files
in the <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> (only)
are <i>promoted</i> to <tt>${mapred.output.dir}</tt>. Of course, the
framework discards the sub-directory of unsuccessful task-attempts. This
is completely transparent to the application.</p>
<p>The application-writer can take advantage of this by creating any
side-files required in <tt>${mapred.work.output.dir}</tt> during execution
of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the
framework will move them out similarly - thus she doesn't have to pick
unique paths per task-attempt.</p>
<p><i>Note</i>: the value of <tt>${mapred.work.output.dir}</tt> during
execution of a particular task-attempt is actually
<tt>${mapred.output.dir}/_temporary/_{$taskid}</tt>, and this value is
set by the map-reduce framework. So, just create any side-files in the
path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce
task to take advantage of this feature.</p>
<p>The entire discussion holds true for maps of jobs with
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
goes directly to HDFS.</p>
@return the {@link Path} to the task's temporary output directory
for the map-reduce job.]]>
</doc>
</method>
<method name="getTaskOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Helper function to create the task's temporary output directory and
return the path to the task's output file.
@param conf job-configuration
@param name temporary task-output filename
@return path to the task's temporary output file
@throws IOException]]>
</doc>
</method>
<method name="getUniqueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Helper function to generate a name that is unique for the task.
<p>The generated name can be used to create custom files from within the
different tasks for the job, the names for different tasks will not collide
with each other.</p>
<p>The given name is postfixed with the task type, 'm' for maps, 'r' for
reduces and the task partition number. For example, give a name 'test'
running on the first map o the job the generated name will be
'test-m-00000'.</p>
@param conf the configuration for the job.
@param name the name to make unique.
@return a unique name accross all tasks of the job.]]>
</doc>
</method>
<method name="getPathForCustomFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
the task within the job output directory.
<p>The path can be used to create custom files from within the map and
reduce tasks. The path name will be unique for each task. The path parent
will be the job output directory.</p>ls
<p>This method uses the {@link #getUniqueName} method to make the file name
unique for the task.</p>
@param conf the configuration for the job.
@param name the name for the file.
@return a unique path accross all tasks of the job.]]>
</doc>
</method>
<doc>
<![CDATA[A base class for {@link OutputFormat}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileOutputFormat -->
<!-- start class org.apache.hadoop.mapred.FileSplit -->
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}
instead.">
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="deprecated, no comment">
<doc>
<![CDATA[Constructs a split.
@deprecated
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process]]>
</doc>
</constructor>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a split with host information
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process
@param hosts the list of hosts containing the block, possibly null]]>
</doc>
</constructor>
<method name="getPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The file containing this split's data.]]>
</doc>
</method>
<method name="getStart" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The position of the first byte in the file to process.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The number of bytes in the file to process.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A section of an input file. Returned by {@link
InputFormat#getSplits(JobConf, int)} and passed to
{@link InputFormat#getRecordReader(InputSplit,JobConf,Reporter)}.
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.FileSplit -->
<!-- start class org.apache.hadoop.mapred.ID -->
<class name="ID" extends="org.apache.hadoop.mapreduce.ID"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ID" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[constructs an ID object from the given int]]>
</doc>
</constructor>
<constructor name="ID"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[A general identifier, which internally stores the id
as an integer. This is the super class of {@link JobID},
{@link TaskID} and {@link TaskAttemptID}.
@see JobID
@see TaskID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.ID -->
<!-- start interface org.apache.hadoop.mapred.InputFormat -->
<interface name="InputFormat" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.">
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Logically split the set of input files for the job.
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
for processing.</p>
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
input files are not physically split into chunks. For e.g. a split could
be <i>&lt;input-file-path, start, offset&gt;</i> tuple.
@param job job configuration.
@param numSplits the desired number of splits, a hint.
@return an array of {@link InputSplit}s for the job.]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the {@link RecordReader} for the given {@link InputSplit}.
<p>It is the responsibility of the <code>RecordReader</code> to respect
record boundaries while processing the logical split to present a
record-oriented view to the individual task.</p>
@param split the {@link InputSplit}
@param job the job that this split belongs to
@return a {@link RecordReader}]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputFormat</code> describes the input-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the input-specification of the job.
<li>
Split-up the input file(s) into logical {@link InputSplit}s, each of
which is then assigned to an individual {@link Mapper}.
</li>
<li>
Provide the {@link RecordReader} implementation to be used to glean
input records from the logical <code>InputSplit</code> for processing by
the {@link Mapper}.
</li>
</ol>
<p>The default behavior of file-based {@link InputFormat}s, typically
sub-classes of {@link FileInputFormat}, is to split the
input into <i>logical</i> {@link InputSplit}s based on the total size, in
bytes, of the input files. However, the {@link FileSystem} blocksize of
the input files is treated as an upper bound for input splits. A lower bound
on the split size can be set via
<a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
mapred.min.split.size</a>.</p>
<p>Clearly, logical splits based on input-size is insufficient for many
applications since record boundaries are to respected. In such cases, the
application has to also implement a {@link RecordReader} on whom lies the
responsibilty to respect record-boundaries and present a record-oriented
view of the logical <code>InputSplit</code> to the individual task.
@see InputSplit
@see RecordReader
@see JobClient
@see FileInputFormat
@deprecated Use {@link org.apache.hadoop.mapreduce.InputFormat} instead.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.InputFormat -->
<!-- start interface org.apache.hadoop.mapred.InputSplit -->
<interface name="InputSplit" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.">
<implements name="org.apache.hadoop.io.Writable"/>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the total number of bytes in the data of the <code>InputSplit</code>.
@return the number of bytes in the input split.
@throws IOException]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the list of hostnames where the input split is located.
@return list of hostnames where data of the <code>InputSplit</code> is
located as an array of <code>String</code>s.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
individual {@link Mapper}.
<p>Typically, it presents a byte-oriented view on the input and is the
responsibility of {@link RecordReader} of the job to process this and present
a record-oriented view.
@see InputFormat
@see RecordReader
@deprecated Use {@link org.apache.hadoop.mapreduce.InputSplit} instead.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.InputSplit -->
<!-- start class org.apache.hadoop.mapred.InvalidFileTypeException -->
<class name="InvalidFileTypeException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidFileTypeException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="InvalidFileTypeException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Used when file type differs from the desired file type. like
getting a file when a directory is expected. Or a wrong file type.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.InvalidFileTypeException -->
<!-- start class org.apache.hadoop.mapred.InvalidInputException -->
<class name="InvalidInputException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidInputException" type="java.util.List"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create the exception with the given list.
@param probs the list of problems to report. this list is not copied.]]>
</doc>
</constructor>
<method name="getProblems" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the complete list of the problems reported.
@return the list of problems, which must not be modified]]>
</doc>
</method>
<method name="getMessage" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get a summary message of the problems found.
@return the concatenated messages from all of the problems.]]>
</doc>
</method>
<doc>
<![CDATA[This class wraps a list of problems with the input, so that the user
can get a list of problems together instead of finding and fixing them one
by one.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.InvalidInputException -->
<!-- start class org.apache.hadoop.mapred.InvalidJobConfException -->
<class name="InvalidJobConfException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidJobConfException"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="InvalidJobConfException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[This exception is thrown when jobconf misses some mendatory attributes
or value of some attributes is invalid.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.InvalidJobConfException -->
<!-- start class org.apache.hadoop.mapred.IsolationRunner -->
<class name="IsolationRunner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="IsolationRunner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Run a single task
@param args the first argument is the task directory]]>
</doc>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.IsolationRunner -->
<!-- start class org.apache.hadoop.mapred.JobClient -->
<class name="JobClient" extends="org.apache.hadoop.conf.Configured"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.MRConstants"/>
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="JobClient"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job client.]]>
</doc>
</constructor>
<constructor name="JobClient" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Build a job client with the given {@link JobConf}, and connect to the
default {@link JobTracker}.
@param conf the job configuration.
@throws IOException]]>
</doc>
</constructor>
<constructor name="JobClient" type="java.net.InetSocketAddress, org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Build a job client, connect to the indicated job tracker.
@param jobTrackAddr the job tracker to connect to.
@param conf configuration.]]>
</doc>
</constructor>
<method name="init"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Connect to the default {@link JobTracker}.
@param conf the job configuration.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close the <code>JobClient</code>.]]>
</doc>
</method>
<method name="getFs" return="org.apache.hadoop.fs.FileSystem"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get a filesystem handle. We need this to prepare jobs
for submission to the MapReduce system.
@return the filesystem handle.]]>
</doc>
</method>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobFile" type="java.lang.String"/>
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
<exception name="InvalidJobConfException" type="org.apache.hadoop.mapred.InvalidJobConfException"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the MR system.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param jobFile the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
running-job.
@throws FileNotFoundException
@throws InvalidJobConfException
@throws IOException]]>
</doc>
</method>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the MR system.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param job the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
running-job.
@throws FileNotFoundException
@throws IOException]]>
</doc>
</method>
<method name="submitJobInternal" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="FileNotFoundException" type="java.io.FileNotFoundException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Internal method for submitting jobs to the system.
@param job the configuration to submit
@return a proxy object for the running job
@throws FileNotFoundException
@throws ClassNotFoundException
@throws InterruptedException
@throws IOException]]>
</doc>
</method>
<method name="isJobDirValid" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobDirPath" type="org.apache.hadoop.fs.Path"/>
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Checks if the job directory is clean and has all the required components
for (re) starting the job]]>
</doc>
</method>
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get an {@link RunningJob} object to track an ongoing job. Returns
null if the id does not correspond to any known job.
@param jobid the jobid of the job.
@return the {@link RunningJob} handle to track the job, null if the
<code>jobid</code> doesn't correspond to any known job.
@throws IOException]]>
</doc>
</method>
<method name="getJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getJob(JobID)}.">
<param name="jobid" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #getJob(JobID)}.]]>
</doc>
</method>
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the map tasks of a job.
@param jobId the job to query.
@return the list of all of the map tips.
@throws IOException]]>
</doc>
</method>
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getMapTaskReports(JobID)}">
<param name="jobId" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #getMapTaskReports(JobID)}]]>
</doc>
</method>
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the reduce tasks of a job.
@param jobId the job to query.
@return the list of all of the reduce tips.
@throws IOException]]>
</doc>
</method>
<method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the cleanup tasks of a job.
@param jobId the job to query.
@return the list of all of the cleanup tips.
@throws IOException]]>
</doc>
</method>
<method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the information of the current state of the setup tasks of a job.
@param jobId the job to query.
@return the list of all of the setup tips.
@throws IOException]]>
</doc>
</method>
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #getReduceTaskReports(JobID)}">
<param name="jobId" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #getReduceTaskReports(JobID)}]]>
</doc>
</method>
<method name="displayTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="type" type="java.lang.String"/>
<param name="state" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Display the information about a job's tasks, of a particular type and
in a particular state
@param jobId the ID of the job
@param type the type of the task (map/reduce/setup/cleanup)
@param state the state of the task
(pending/running/completed/failed/killed)]]>
</doc>
</method>
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the Map-Reduce cluster.
@return the status information about the Map-Reduce cluster as an object
of {@link ClusterStatus}.
@throws IOException]]>
</doc>
</method>
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="detailed" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the Map-Reduce cluster.
@param detailed if true then get a detailed status including the
tracker names
@return the status information about the Map-Reduce cluster as an object
of {@link ClusterStatus}.
@throws IOException]]>
</doc>
</method>
<method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the jobs that are not completed and not failed.
@return array of {@link JobStatus} for the running/to-be-run jobs.
@throws IOException]]>
</doc>
</method>
<method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the jobs that are submitted.
@return array of {@link JobStatus} for the submitted jobs.
@throws IOException]]>
</doc>
</method>
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Utility that submits a job, then polls for progress until the job is
complete.
@param job the job configuration.
@throws IOException if the job fails]]>
</doc>
</method>
<method name="monitorAndPrintJob" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="job" type="org.apache.hadoop.mapred.RunningJob"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Monitor a job and print status in real-time as progress is made and tasks
fail.
@param conf the job's configuration
@param job the job to track
@return true if the job succeeded
@throws IOException if communication to the JobTracker fails]]>
</doc>
</method>
<method name="setTaskOutputFilter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
<doc>
<![CDATA[Sets the output filter for tasks. only those tasks are printed whose
output matches the filter.
@param newValue task filter.]]>
</doc>
</method>
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the task output filter out of the JobConf.
@param job the JobConf to examine.
@return the filter level.]]>
</doc>
</method>
<method name="setTaskOutputFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="newValue" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"/>
<doc>
<![CDATA[Modify the JobConf to set the task output filter.
@param job the JobConf to modify.
@param newValue the value to set.]]>
</doc>
</method>
<method name="getTaskOutputFilter" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns task output filter.
@return task filter.]]>
</doc>
</method>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<method name="getDefaultMaps" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the max available Maps in the cluster.
@return the max available Maps in the cluster
@throws IOException]]>
</doc>
</method>
<method name="getDefaultReduces" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get status information about the max available Reduces in the cluster.
@return the max available Reduces in the cluster
@throws IOException]]>
</doc>
</method>
<method name="getSystemDir" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Grab the jobtracker system directory path where job-specific files are to be placed.
@return the system directory where job-specific files are to be placed.]]>
</doc>
</method>
<method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return an array of queue information objects about all the Job Queues
configured.
@return Array of JobQueueInfo objects
@throws IOException]]>
</doc>
</method>
<method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets all the jobs which were added to particular Job Queue
@param queueName name of the Job Queue
@return Array of jobs present in the job queue
@throws IOException]]>
</doc>
</method>
<method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the queue information associated to a particular Job Queue
@param queueName name of the job queue.
@return Queue information associated to particular queue.
@throws IOException]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<doc>
<![CDATA[<code>JobClient</code> is the primary interface for the user-job to interact
with the {@link JobTracker}.
<code>JobClient</code> provides facilities to submit jobs, track their
progress, access component-tasks' reports/logs, get the Map-Reduce cluster
status information etc.
<p>The job submission process involves:
<ol>
<li>
Checking the input and output specifications of the job.
</li>
<li>
Computing the {@link InputSplit}s for the job.
</li>
<li>
Setup the requisite accounting information for the {@link DistributedCache}
of the job, if necessary.
</li>
<li>
Copying the job's jar and configuration to the map-reduce system directory
on the distributed file-system.
</li>
<li>
Submitting the job to the <code>JobTracker</code> and optionally monitoring
it's status.
</li>
</ol></p>
Normally the user creates the application, describes various facets of the
job via {@link JobConf} and then uses the <code>JobClient</code> to submit
the job and monitor its progress.
<p>Here is an example on how to use <code>JobClient</code>:</p>
<p><blockquote><pre>
// Create a new JobConf
JobConf job = new JobConf(new Configuration(), MyJob.class);
// Specify various job-specific parameters
job.setJobName("myjob");
job.setInputPath(new Path("in"));
job.setOutputPath(new Path("out"));
job.setMapperClass(MyJob.MyMapper.class);
job.setReducerClass(MyJob.MyReducer.class);
// Submit the job, then poll for progress until the job is complete
JobClient.runJob(job);
</pre></blockquote></p>
<h4 id="JobControl">Job Control</h4>
<p>At times clients would chain map-reduce jobs to accomplish complex tasks
which cannot be done via a single map-reduce job. This is fairly easy since
the output of the job, typically, goes to distributed file-system and that
can be used as the input for the next job.</p>
<p>However, this also means that the onus on ensuring jobs are complete
(success/failure) lies squarely on the clients. In such situations the
various job-control options are:
<ol>
<li>
{@link #runJob(JobConf)} : submits the job and returns only after
the job has completed.
</li>
<li>
{@link #submitJob(JobConf)} : only submits the job, then poll the
returned handle to the {@link RunningJob} to query status and make
scheduling decisions.
</li>
<li>
{@link JobConf#setJobEndNotificationURI(String)} : setup a notification
on job-completion, thus avoiding polling.
</li>
</ol></p>
@see JobConf
@see ClusterStatus
@see Tool
@see DistributedCache]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobClient -->
<!-- start class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
<class name="JobClient.TaskStatusFilter" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="NONE" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="KILLED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SUCCEEDED" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="ALL" type="org.apache.hadoop.mapred.JobClient.TaskStatusFilter"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.JobClient.TaskStatusFilter -->
<!-- start class org.apache.hadoop.mapred.JobConf -->
<class name="JobConf" extends="org.apache.hadoop.conf.Configuration"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link Configuration} instead">
<constructor name="JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.]]>
</doc>
</constructor>
<constructor name="JobConf" type="java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.
@param exampleClass a class whose containing jar is used as the job's jar.]]>
</doc>
</constructor>
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.
@param conf a Configuration whose settings will be inherited.]]>
</doc>
</constructor>
<constructor name="JobConf" type="org.apache.hadoop.conf.Configuration, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce job configuration.
@param conf a Configuration whose settings will be inherited.
@param exampleClass a class whose containing jar is used as the job's jar.]]>
</doc>
</constructor>
<constructor name="JobConf" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce configuration.
@param config a Configuration-format XML job description file.]]>
</doc>
</constructor>
<constructor name="JobConf" type="org.apache.hadoop.fs.Path"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a map/reduce configuration.
@param config a Configuration-format XML job description file.]]>
</doc>
</constructor>
<constructor name="JobConf" type="boolean"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A new map/reduce configuration where the behavior of reading from the
default resources can be turned off.
<p/>
If the parameter {@code loadDefaults} is false, the new instance
will not load resources from the default files.
@param loadDefaults specifies whether to load from the default files]]>
</doc>
</constructor>
<method name="getJar" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user jar for the map-reduce job.
@return the user jar for the map-reduce job.]]>
</doc>
</method>
<method name="setJar"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jar" type="java.lang.String"/>
<doc>
<![CDATA[Set the user jar for the map-reduce job.
@param jar the user jar for the map-reduce job.]]>
</doc>
</method>
<method name="setJarByClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<doc>
<![CDATA[Set the job's jar file by finding an example class location.
@param cls the example class.]]>
</doc>
</method>
<method name="getLocalDirs" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="deleteLocalFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="deleteLocalFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="subdir" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocalPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="pathString" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Constructs a local file name. Files are distributed among configured
local directories.]]>
</doc>
</method>
<method name="getUser" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the reported username for this job.
@return the username]]>
</doc>
</method>
<method name="setUser"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="user" type="java.lang.String"/>
<doc>
<![CDATA[Set the reported username for this job.
@param user the username for this job.]]>
</doc>
</method>
<method name="setKeepFailedTaskFiles"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="keep" type="boolean"/>
<doc>
<![CDATA[Set whether the framework should keep the intermediate files for
failed tasks.
@param keep <code>true</code> if framework should keep the intermediate files
for failed tasks, <code>false</code> otherwise.]]>
</doc>
</method>
<method name="getKeepFailedTaskFiles" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should the temporary files for failed tasks be kept?
@return should the files be kept?]]>
</doc>
</method>
<method name="setKeepTaskFilesPattern"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="pattern" type="java.lang.String"/>
<doc>
<![CDATA[Set a regular expression for task names that should be kept.
The regular expression ".*_m_000123_0" would keep the files
for the first instance of map 123 that ran.
@param pattern the java.util.regex.Pattern to match against the
task names.]]>
</doc>
</method>
<method name="getKeepTaskFilesPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the regular expression that is matched against the task names
to see if we need to keep the files.
@return the pattern as a string, if it was set, othewise null.]]>
</doc>
</method>
<method name="setWorkingDirectory"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the current working directory for the default file system.
@param dir the new current working directory.]]>
</doc>
</method>
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the current working directory for the default file system.
@return the directory name.]]>
</doc>
</method>
<method name="setNumTasksToExecutePerJvm"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="numTasks" type="int"/>
<doc>
<![CDATA[Sets the number of tasks that a spawned task JVM should run
before it exits
@param numTasks the number of tasks to execute; defaults to 1;
-1 signifies no limit]]>
</doc>
</method>
<method name="getNumTasksToExecutePerJvm" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the number of tasks that a spawned JVM should execute]]>
</doc>
</method>
<method name="getInputFormat" return="org.apache.hadoop.mapred.InputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link InputFormat} implementation for the map-reduce job,
defaults to {@link TextInputFormat} if not specified explicity.
@return the {@link InputFormat} implementation for the map-reduce job.]]>
</doc>
</method>
<method name="setInputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link InputFormat} implementation for the map-reduce job.
@param theClass the {@link InputFormat} implementation for the map-reduce
job.]]>
</doc>
</method>
<method name="getOutputFormat" return="org.apache.hadoop.mapred.OutputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link OutputFormat} implementation for the map-reduce job,
defaults to {@link TextOutputFormat} if not specified explicity.
@return the {@link OutputFormat} implementation for the map-reduce job.]]>
</doc>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapred.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link OutputCommitter} implementation for the map-reduce job,
defaults to {@link FileOutputCommitter} if not specified explicitly.
@return the {@link OutputCommitter} implementation for the map-reduce job.]]>
</doc>
</method>
<method name="setOutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link OutputCommitter} implementation for the map-reduce job.
@param theClass the {@link OutputCommitter} implementation for the map-reduce
job.]]>
</doc>
</method>
<method name="setOutputFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link OutputFormat} implementation for the map-reduce job.
@param theClass the {@link OutputFormat} implementation for the map-reduce
job.]]>
</doc>
</method>
<method name="setCompressMapOutput"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="compress" type="boolean"/>
<doc>
<![CDATA[Should the map outputs be compressed before transfer?
Uses the SequenceFile compression.
@param compress should the map outputs be compressed?]]>
</doc>
</method>
<method name="getCompressMapOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Are the outputs of the maps be compressed?
@return <code>true</code> if the outputs of the maps are to be compressed,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setMapOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="codecClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the given class as the {@link CompressionCodec} for the map outputs.
@param codecClass the {@link CompressionCodec} class that will compress
the map outputs.]]>
</doc>
</method>
<method name="getMapOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="defaultValue" type="java.lang.Class"/>
<doc>
<![CDATA[Get the {@link CompressionCodec} for compressing the map outputs.
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} class that should be used to compress the
map outputs.
@throws IllegalArgumentException if the class was specified, but not found]]>
</doc>
</method>
<method name="getMapOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the map output data. If it is not set, use the
(final) output key class. This allows the map output key class to be
different than the final output key class.
@return the map output key class.]]>
</doc>
</method>
<method name="setMapOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the key class for the map output data. This allows the user to
specify the map output key class to be different than the final output
value class.
@param theClass the map output key class.]]>
</doc>
</method>
<method name="getMapOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for the map output data. If it is not set, use the
(final) output value class This allows the map output value class to be
different than the final output value class.
@return the map output value class.]]>
</doc>
</method>
<method name="setMapOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the value class for the map output data. This allows the user to
specify the map output value class to be different than the final output
value class.
@param theClass the map output value class.]]>
</doc>
</method>
<method name="getOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the job output data.
@return the key class for the job output data.]]>
</doc>
</method>
<method name="setOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the key class for the job output data.
@param theClass the key class for the job output data.]]>
</doc>
</method>
<method name="getOutputKeyComparator" return="org.apache.hadoop.io.RawComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
@return the {@link RawComparator} comparator used to compare keys.]]>
</doc>
</method>
<method name="setOutputKeyComparatorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link RawComparator} comparator used to compare keys.
@param theClass the {@link RawComparator} comparator used to
compare keys.
@see #setOutputValueGroupingComparator(Class)]]>
</doc>
</method>
<method name="setKeyFieldComparatorOptions"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="keySpec" type="java.lang.String"/>
<doc>
<![CDATA[Set the {@link KeyFieldBasedComparator} options used to compare keys.
@param keySpec the key specification of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field). opts are ordering options. The supported options
are:
-n, (Sort numerically)
-r, (Reverse the result of comparison)]]>
</doc>
</method>
<method name="getKeyFieldComparatorOption" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link KeyFieldBasedComparator} options]]>
</doc>
</method>
<method name="setKeyFieldPartitionerOptions"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="keySpec" type="java.lang.String"/>
<doc>
<![CDATA[Set the {@link KeyFieldBasedPartitioner} options used for
{@link Partitioner}
@param keySpec the key specification of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field).]]>
</doc>
</method>
<method name="getKeyFieldPartitionerOption" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link KeyFieldBasedPartitioner} options]]>
</doc>
</method>
<method name="getOutputValueGroupingComparator" return="org.apache.hadoop.io.RawComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user defined {@link WritableComparable} comparator for
grouping keys of inputs to the reduce.
@return comparator set by the user for grouping values.
@see #setOutputValueGroupingComparator(Class) for details.]]>
</doc>
</method>
<method name="setOutputValueGroupingComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the user defined {@link RawComparator} comparator for
grouping keys in the input to the reduce.
<p>This comparator should be provided if the equivalence rules for keys
for sorting the intermediates are different from those for grouping keys
before each call to
{@link Reducer#reduce(Object, java.util.Iterator, OutputCollector, Reporter)}.</p>
<p>For key-value pairs (K1,V1) and (K2,V2), the values (V1, V2) are passed
in a single call to the reduce function if K1 and K2 compare as equal.</p>
<p>Since {@link #setOutputKeyComparatorClass(Class)} can be used to control
how keys are sorted, this can be used in conjunction to simulate
<i>secondary sort on values</i>.</p>
<p><i>Note</i>: This is not a guarantee of the reduce sort being
<i>stable</i> in any sense. (In any case, with the order of available
map-outputs to the reduce being non-deterministic, it wouldn't make
that much sense.)</p>
@param theClass the comparator class to be used for grouping keys.
It should implement <code>RawComparator</code>.
@see #setOutputKeyComparatorClass(Class)]]>
</doc>
</method>
<method name="getUseNewMapper" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should the framework use the new context-object code for running
the mapper?
@return true, if the new api should be used]]>
</doc>
</method>
<method name="setUseNewMapper"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="flag" type="boolean"/>
<doc>
<![CDATA[Set whether the framework should use the new api for the mapper.
This is the default for jobs submitted with the new Job api.
@param flag true, if the new api should be used]]>
</doc>
</method>
<method name="getUseNewReducer" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should the framework use the new context-object code for running
the reducer?
@return true, if the new api should be used]]>
</doc>
</method>
<method name="setUseNewReducer"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="flag" type="boolean"/>
<doc>
<![CDATA[Set whether the framework should use the new api for the reducer.
This is the default for jobs submitted with the new Job api.
@param flag true, if the new api should be used]]>
</doc>
</method>
<method name="getOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for job outputs.
@return the value class for job outputs.]]>
</doc>
</method>
<method name="setOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the value class for job outputs.
@param theClass the value class for job outputs.]]>
</doc>
</method>
<method name="getMapperClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link Mapper} class for the job.
@return the {@link Mapper} class for the job.]]>
</doc>
</method>
<method name="setMapperClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link Mapper} class for the job.
@param theClass the {@link Mapper} class for the job.]]>
</doc>
</method>
<method name="getMapRunnerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link MapRunnable} class for the job.
@return the {@link MapRunnable} class for the job.]]>
</doc>
</method>
<method name="setMapRunnerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Expert: Set the {@link MapRunnable} class for the job.
Typically used to exert greater control on {@link Mapper}s.
@param theClass the {@link MapRunnable} class for the job.]]>
</doc>
</method>
<method name="getPartitionerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link Partitioner} used to partition {@link Mapper}-outputs
to be sent to the {@link Reducer}s.
@return the {@link Partitioner} used to partition map-outputs.]]>
</doc>
</method>
<method name="setPartitionerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link Partitioner} class used to partition
{@link Mapper}-outputs to be sent to the {@link Reducer}s.
@param theClass the {@link Partitioner} used to partition map-outputs.]]>
</doc>
</method>
<method name="getReducerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link Reducer} class for the job.
@return the {@link Reducer} class for the job.]]>
</doc>
</method>
<method name="setReducerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link Reducer} class for the job.
@param theClass the {@link Reducer} class for the job.]]>
</doc>
</method>
<method name="getCombinerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-defined <i>combiner</i> class used to combine map-outputs
before being sent to the reducers. Typically the combiner is same as the
the {@link Reducer} for the job i.e. {@link #getReducerClass()}.
@return the user-defined combiner class used to combine map-outputs.]]>
</doc>
</method>
<method name="setCombinerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the user-defined <i>combiner</i> class used to combine map-outputs
before being sent to the reducers.
<p>The combiner is an application-specified aggregation operation, which
can help cut down the amount of data transferred between the
{@link Mapper} and the {@link Reducer}, leading to better performance.</p>
<p>The framework may invoke the combiner 0, 1, or multiple times, in both
the mapper and reducer tasks. In general, the combiner is called as the
sort/merge result is written to disk. The combiner must:
<ul>
<li> be side-effect free</li>
<li> have the same input and output key types and the same input and
output value types</li>
</ul></p>
<p>Typically the combiner is same as the <code>Reducer</code> for the
job i.e. {@link #setReducerClass(Class)}.</p>
@param theClass the user-defined combiner class used to combine
map-outputs.]]>
</doc>
</method>
<method name="getSpeculativeExecution" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should speculative execution be used for this job?
Defaults to <code>true</code>.
@return <code>true</code> if speculative execution be used for this job,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on, else <code>false</code>.]]>
</doc>
</method>
<method name="getMapSpeculativeExecution" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should speculative execution be used for this job for map tasks?
Defaults to <code>true</code>.
@return <code>true</code> if speculative execution be
used for this job for map tasks,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setMapSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job for map tasks.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on for map tasks,
else <code>false</code>.]]>
</doc>
</method>
<method name="getReduceSpeculativeExecution" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Should speculative execution be used for this job for reduce tasks?
Defaults to <code>true</code>.
@return <code>true</code> if speculative execution be used
for reduce tasks for this job,
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setReduceSpeculativeExecution"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="speculativeExecution" type="boolean"/>
<doc>
<![CDATA[Turn speculative execution on or off for this job for reduce tasks.
@param speculativeExecution <code>true</code> if speculative execution
should be turned on for reduce tasks,
else <code>false</code>.]]>
</doc>
</method>
<method name="getNumMapTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get configured the number of reduce tasks for this job.
Defaults to <code>1</code>.
@return the number of reduce tasks for this job.]]>
</doc>
</method>
<method name="setNumMapTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Set the number of map tasks for this job.
<p><i>Note</i>: This is only a <i>hint</i> to the framework. The actual
number of spawned map tasks depends on the number of {@link InputSplit}s
generated by the job's {@link InputFormat#getSplits(JobConf, int)}.
A custom {@link InputFormat} is typically used to accurately control
the number of map tasks for the job.</p>
<h4 id="NoOfMaps">How many maps?</h4>
<p>The number of maps is usually driven by the total size of the inputs
i.e. total number of blocks of the input files.</p>
<p>The right level of parallelism for maps seems to be around 10-100 maps
per-node, although it has been set up to 300 or so for very cpu-light map
tasks. Task setup takes awhile, so it is best if the maps take at least a
minute to execute.</p>
<p>The default behavior of file-based {@link InputFormat}s is to split the
input into <i>logical</i> {@link InputSplit}s based on the total size, in
bytes, of input files. However, the {@link FileSystem} blocksize of the
input files is treated as an upper bound for input splits. A lower bound
on the split size can be set via
<a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
mapred.min.split.size</a>.</p>
<p>Thus, if you expect 10TB of input data and have a blocksize of 128MB,
you'll end up with 82,000 maps, unless {@link #setNumMapTasks(int)} is
used to set it even higher.</p>
@param n the number of map tasks for this job.
@see InputFormat#getSplits(JobConf, int)
@see FileInputFormat
@see FileSystem#getDefaultBlockSize()
@see FileStatus#getBlockSize()]]>
</doc>
</method>
<method name="getNumReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get configured the number of reduce tasks for this job. Defaults to
<code>1</code>.
@return the number of reduce tasks for this job.]]>
</doc>
</method>
<method name="setNumReduceTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Set the requisite number of reduce tasks for this job.
<h4 id="NoOfReduces">How many reduces?</h4>
<p>The right number of reduces seems to be <code>0.95</code> or
<code>1.75</code> multiplied by (&lt;<i>no. of nodes</i>&gt; *
<a href="{@docRoot}/../mapred-default.html#mapred.tasktracker.reduce.tasks.maximum">
mapred.tasktracker.reduce.tasks.maximum</a>).
</p>
<p>With <code>0.95</code> all of the reduces can launch immediately and
start transfering map outputs as the maps finish. With <code>1.75</code>
the faster nodes will finish their first round of reduces and launch a
second wave of reduces doing a much better job of load balancing.</p>
<p>Increasing the number of reduces increases the framework overhead, but
increases load balancing and lowers the cost of failures.</p>
<p>The scaling factors above are slightly less than whole numbers to
reserve a few reduce slots in the framework for speculative-tasks, failures
etc.</p>
<h4 id="ReducerNone">Reducer NONE</h4>
<p>It is legal to set the number of reduce-tasks to <code>zero</code>.</p>
<p>In this case the output of the map-tasks directly go to distributed
file-system, to the path set by
{@link FileOutputFormat#setOutputPath(JobConf, Path)}. Also, the
framework doesn't sort the map-outputs before writing it out to HDFS.</p>
@param n the number of reduce tasks for this job.]]>
</doc>
</method>
<method name="getMaxMapAttempts" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of maximum attempts that will be made to run a
map task, as specified by the <code>mapred.map.max.attempts</code>
property. If this property is not already set, the default is 4 attempts.
@return the max number of attempts per map task.]]>
</doc>
</method>
<method name="setMaxMapAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
map task.
@param n the number of attempts per map task.]]>
</doc>
</method>
<method name="getMaxReduceAttempts" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configured number of maximum attempts that will be made to run a
reduce task, as specified by the <code>mapred.reduce.max.attempts</code>
property. If this property is not already set, the default is 4 attempts.
@return the max number of attempts per reduce task.]]>
</doc>
</method>
<method name="setMaxReduceAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="int"/>
<doc>
<![CDATA[Expert: Set the number of maximum attempts that will be made to run a
reduce task.
@param n the number of attempts per reduce task.]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified job name. This is only used to identify the
job to the user.
@return the job's name, defaulting to "".]]>
</doc>
</method>
<method name="setJobName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Set the user-specified job name.
@param name the job's new name.]]>
</doc>
</method>
<method name="getSessionId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified session identifier. The default is the empty string.
The session identifier is used to tag metric data that is reported to some
performance metrics system via the org.apache.hadoop.metrics API. The
session identifier is intended, in particular, for use by Hadoop-On-Demand
(HOD) which allocates a virtual Hadoop cluster dynamically and transiently.
HOD will set the session identifier by modifying the mapred-site.xml file
before starting the cluster.
When not running under HOD, this identifer is expected to remain set to
the empty string.
@return the session identifier, defaulting to "".]]>
</doc>
</method>
<method name="setSessionId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="sessionId" type="java.lang.String"/>
<doc>
<![CDATA[Set the user-specified session identifier.
@param sessionId the new session id.]]>
</doc>
</method>
<method name="setMaxTaskFailuresPerTracker"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="noFailures" type="int"/>
<doc>
<![CDATA[Set the maximum no. of failures of a given job per tasktracker.
If the no. of task failures exceeds <code>noFailures</code>, the
tasktracker is <i>blacklisted</i> for this job.
@param noFailures maximum no. of failures of a given job per tasktracker.]]>
</doc>
</method>
<method name="getMaxTaskFailuresPerTracker" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Expert: Get the maximum no. of failures of a given job per tasktracker.
If the no. of task failures exceeds this, the tasktracker is
<i>blacklisted</i> for this job.
@return the maximum no. of failures of a given job per tasktracker.]]>
</doc>
</method>
<method name="getMaxMapTaskFailuresPercent" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum percentage of map tasks that can fail without
the job being aborted.
Each map task is executed a minimum of {@link #getMaxMapAttempts()}
attempts before being declared as <i>failed</i>.
Defaults to <code>zero</code>, i.e. <i>any</i> failed map-task results in
the job being declared as {@link JobStatus#FAILED}.
@return the maximum percentage of map tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="setMaxMapTaskFailuresPercent"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="percent" type="int"/>
<doc>
<![CDATA[Expert: Set the maximum percentage of map tasks that can fail without the
job being aborted.
Each map task is executed a minimum of {@link #getMaxMapAttempts} attempts
before being declared as <i>failed</i>.
@param percent the maximum percentage of map tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="getMaxReduceTaskFailuresPercent" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the maximum percentage of reduce tasks that can fail without
the job being aborted.
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
attempts before being declared as <i>failed</i>.
Defaults to <code>zero</code>, i.e. <i>any</i> failed reduce-task results
in the job being declared as {@link JobStatus#FAILED}.
@return the maximum percentage of reduce tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="setMaxReduceTaskFailuresPercent"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="percent" type="int"/>
<doc>
<![CDATA[Set the maximum percentage of reduce tasks that can fail without the job
being aborted.
Each reduce task is executed a minimum of {@link #getMaxReduceAttempts()}
attempts before being declared as <i>failed</i>.
@param percent the maximum percentage of reduce tasks that can fail without
the job being aborted.]]>
</doc>
</method>
<method name="setJobPriority"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="prio" type="org.apache.hadoop.mapred.JobPriority"/>
<doc>
<![CDATA[Set {@link JobPriority} for this job.
@param prio the {@link JobPriority} for this job.]]>
</doc>
</method>
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link JobPriority} for this job.
@return the {@link JobPriority} for this job.]]>
</doc>
</method>
<method name="getProfileEnabled" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get whether the task profiling is enabled.
@return true if some tasks will be profiled]]>
</doc>
</method>
<method name="setProfileEnabled"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newValue" type="boolean"/>
<doc>
<![CDATA[Set whether the system should collect profiler information for some of
the tasks in this job? The information is stored in the user log
directory.
@param newValue true means it should be gathered]]>
</doc>
</method>
<method name="getProfileParams" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the profiler configuration arguments.
The default value for this property is
"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s"
@return the parameters to pass to the task child to configure profiling]]>
</doc>
</method>
<method name="setProfileParams"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="value" type="java.lang.String"/>
<doc>
<![CDATA[Set the profiler configuration arguments. If the string contains a '%s' it
will be replaced with the name of the profiling output file when the task
runs.
This value is passed to the task child JVM on the command line.
@param value the configuration string]]>
</doc>
</method>
<method name="getProfileTaskRange" return="org.apache.hadoop.conf.Configuration.IntegerRanges"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="isMap" type="boolean"/>
<doc>
<![CDATA[Get the range of maps or reduces to profile.
@param isMap is the task a map?
@return the task ranges]]>
</doc>
</method>
<method name="setProfileTaskRange"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="isMap" type="boolean"/>
<param name="newValue" type="java.lang.String"/>
<doc>
<![CDATA[Set the ranges of maps or reduces to profile. setProfileEnabled(true)
must also be called.
@param newValue a set of integer ranges of the map ids]]>
</doc>
</method>
<method name="setMapDebugScript"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mDbgScript" type="java.lang.String"/>
<doc>
<![CDATA[Set the debug script to run when the map tasks fail.
<p>The debug script can aid debugging of failed map tasks. The script is
given task's stdout, stderr, syslog, jobconf files as arguments.</p>
<p>The debug command, run on the node where the map failed, is:</p>
<p><pre><blockquote>
$script $stdout $stderr $syslog $jobconf.
</blockquote></pre></p>
<p> The script file is distributed through {@link DistributedCache}
APIs. The script needs to be symlinked. </p>
<p>Here is an example on how to submit a script
<p><blockquote><pre>
job.setMapDebugScript("./myscript");
DistributedCache.createSymlink(job);
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
</pre></blockquote></p>
@param mDbgScript the script name]]>
</doc>
</method>
<method name="getMapDebugScript" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the map task's debug script.
@return the debug Script for the mapred job for failed map tasks.
@see #setMapDebugScript(String)]]>
</doc>
</method>
<method name="setReduceDebugScript"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rDbgScript" type="java.lang.String"/>
<doc>
<![CDATA[Set the debug script to run when the reduce tasks fail.
<p>The debug script can aid debugging of failed reduce tasks. The script
is given task's stdout, stderr, syslog, jobconf files as arguments.</p>
<p>The debug command, run on the node where the map failed, is:</p>
<p><pre><blockquote>
$script $stdout $stderr $syslog $jobconf.
</blockquote></pre></p>
<p> The script file is distributed through {@link DistributedCache}
APIs. The script file needs to be symlinked </p>
<p>Here is an example on how to submit a script
<p><blockquote><pre>
job.setReduceDebugScript("./myscript");
DistributedCache.createSymlink(job);
DistributedCache.addCacheFile("/debug/scripts/myscript#myscript");
</pre></blockquote></p>
@param rDbgScript the script name]]>
</doc>
</method>
<method name="getReduceDebugScript" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the reduce task's debug Script
@return the debug script for the mapred job for failed reduce tasks.
@see #setReduceDebugScript(String)]]>
</doc>
</method>
<method name="getJobEndNotificationURI" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the uri to be invoked in-order to send a notification after the job
has completed (success/failure).
@return the job end notification uri, <code>null</code> if it hasn't
been set.
@see #setJobEndNotificationURI(String)]]>
</doc>
</method>
<method name="setJobEndNotificationURI"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="uri" type="java.lang.String"/>
<doc>
<![CDATA[Set the uri to be invoked in-order to send a notification after the job
has completed (success/failure).
<p>The uri can contain 2 special parameters: <tt>$jobId</tt> and
<tt>$jobStatus</tt>. Those, if present, are replaced by the job's
identifier and completion-status respectively.</p>
<p>This is typically used by application-writers to implement chaining of
Map-Reduce jobs in an <i>asynchronous manner</i>.</p>
@param uri the job end notification uri
@see JobStatus
@see <a href="{@docRoot}/org/apache/hadoop/mapred/JobClient.html#JobCompletionAndChaining">Job Completion and Chaining</a>]]>
</doc>
</method>
<method name="getJobLocalDir" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get job-specific shared directory for use as scratch space
<p>
When a job starts, a shared directory is created at location
<code>
${mapred.local.dir}/taskTracker/jobcache/$jobid/work/ </code>.
This directory is exposed to the users through
<code>job.local.dir </code>.
So, the tasks can use this space
as scratch space and share files among them. </p>
This value is available as System property also.
@return The localized job specific shared directory]]>
</doc>
</method>
<method name="getMaxVirtualMemoryForTask" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The maximum amount of memory any task of this job will use. See
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
@return The maximum amount of memory any task of this job will use, in
bytes.
@see #setMaxVirtualMemoryForTask(long)]]>
</doc>
</method>
<method name="setMaxVirtualMemoryForTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="vmem" type="long"/>
<doc>
<![CDATA[Set the maximum amount of memory any task of this job can use. See
{@link #MAPRED_TASK_MAXVMEM_PROPERTY}
@param vmem Maximum amount of virtual memory in bytes any task of this job
can use.
@see #getMaxVirtualMemoryForTask()]]>
</doc>
</method>
<method name="getMaxPhysicalMemoryForTask" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The maximum amount of physical memory any task of this job will use. See
{@link #MAPRED_TASK_MAXPMEM_PROPERTY}
@return The maximum amount of physical memory any task of this job will
use, in bytes.
@see #setMaxPhysicalMemoryForTask(long)]]>
</doc>
</method>
<method name="setMaxPhysicalMemoryForTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="pmem" type="long"/>
<doc>
<![CDATA[Set the maximum amount of physical memory any task of this job can use. See
{@link #MAPRED_TASK_MAXPMEM_PROPERTY}
@param pmem Maximum amount of physical memory in bytes any task of this job
can use.
@see #getMaxPhysicalMemoryForTask()]]>
</doc>
</method>
<method name="getQueueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the name of the queue to which this job is submitted.
Defaults to 'default'.
@return name of the queue]]>
</doc>
</method>
<method name="setQueueName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<doc>
<![CDATA[Set the name of the queue to which this job should be submitted.
@param queueName Name of the queue]]>
</doc>
</method>
<field name="DISABLED_MEMORY_LIMIT" type="long"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A value which if set for memory related configuration options,
indicates that the options are turned off.]]>
</doc>
</field>
<field name="DEFAULT_QUEUE_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Name of the queue to which jobs will be submitted, if no queue
name is mentioned.]]>
</doc>
</field>
<field name="MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Cluster-wide configuration to be set by the administrators that provides
default amount of maximum virtual memory for job's tasks. This has to be
set on both the JobTracker node for the sake of scheduling decisions and on
the TaskTracker nodes for the sake of memory management.
<p>
If a job doesn't specify its virtual memory requirement by setting
{@link #MAPRED_TASK_MAXVMEM_PROPERTY} to {@link #DISABLED_MEMORY_LIMIT},
tasks are assured a memory limit set to this property. This property is
disabled by default, and if not explicitly set to a valid value by the
administrators and if a job doesn't specify its virtual memory
requirements, the job's tasks will not be assured anything and may be
killed by a TT that intends to control the total memory usage of the tasks
via memory management functionality.
<p>
This value should in general be less than the cluster-wide configuration
{@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} . If not or if it not set,
TaskTracker's memory management may be disabled and a scheduler's memory
based scheduling decisions will be affected. Please refer to the
documentation of the configured scheduler to see how this property is used.]]>
</doc>
</field>
<field name="MAPRED_TASK_MAXVMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The maximum amount of memory any task of this job will use.
<p>
This value will be used by TaskTrackers for monitoring the memory usage of
tasks of this jobs. If a TaskTracker's memory management functionality is
enabled, each task of this job will be allowed to use a maximum virtual
memory specified by this property. If the task's memory usage goes over
this value, the task will be failed by the TT. If not set, the cluster-wide
configuration {@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} is used as the
default value for memory requirements. If this property cascaded with
{@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} becomes equal to -1, job's
tasks will not be assured anything and may be killed by a TT that intends
to control the total memory usage of the tasks via memory management
functionality. If the memory management functionality is disabled on a TT,
this value is ignored.
<p>
This value should also be not more than the cluster-wide configuration
{@link #UPPER_LIMIT_ON_TASK_VMEM_PROPERTY} which has to be set by the site
administrators.
<p>
This value may be used by schedulers that support scheduling based on job's
memory requirements. In general, a task of this job will be scheduled on a
TaskTracker only if the amount of virtual memory still unoccupied on the
TaskTracker is greater than or equal to this value. But different
schedulers can take different decisions. Please refer to the documentation
of the scheduler being configured to see if it does memory based scheduling
and if it does, how this property is used by that scheduler.
@see #setMaxVirtualMemoryForTask(long)
@see #getMaxVirtualMemoryForTask()]]>
</doc>
</field>
<field name="MAPRED_TASK_MAXPMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The maximum amount of physical memory any task of a job will use.
<p>
This value may be used by schedulers that support scheduling based on job's
memory requirements. In general, a task of this job will be scheduled on a
TaskTracker, only if the amount of physical memory still unoccupied on the
TaskTracker is greater than or equal to this value. But different
schedulers can take different decisions. Please refer to the documentation
of the scheduler being configured to see how it does memory based
scheduling and how this variable is used by that scheduler.
@see #setMaxPhysicalMemoryForTask(long)
@see #getMaxPhysicalMemoryForTask()]]>
</doc>
</field>
<field name="UPPER_LIMIT_ON_TASK_VMEM_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Cluster-wide configuration to be set by the site administrators that
provides an upper limit on the maximum virtual memory that can be specified
by a job. The job configuration {@link #MAPRED_TASK_MAXVMEM_PROPERTY} and
the cluster-wide configuration
{@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} should, by definition, be
less than this value. If the job configuration
{@link #MAPRED_TASK_DEFAULT_MAXVMEM_PROPERTY} is more than this value,
depending on the scheduler being configured, the job may be rejected or the
job configuration may just be ignored.
<p>
If it is not set on a TaskTracker, TaskTracker's memory management will be
disabled.]]>
</doc>
</field>
<doc>
<![CDATA[A map/reduce job configuration.
<p><code>JobConf</code> is the primary interface for a user to describe a
map-reduce job to the Hadoop framework for execution. The framework tries to
faithfully execute the job as-is described by <code>JobConf</code>, however:
<ol>
<li>
Some configuration parameters might have been marked as
<a href="{@docRoot}/org/apache/hadoop/conf/Configuration.html#FinalParams">
final</a> by administrators and hence cannot be altered.
</li>
<li>
While some job parameters are straight-forward to set
(e.g. {@link #setNumReduceTasks(int)}), some parameters interact subtly
rest of the framework and/or job-configuration and is relatively more
complex for the user to control finely (e.g. {@link #setNumMapTasks(int)}).
</li>
</ol></p>
<p><code>JobConf</code> typically specifies the {@link Mapper}, combiner
(if any), {@link Partitioner}, {@link Reducer}, {@link InputFormat} and
{@link OutputFormat} implementations to be used etc.
<p>Optionally <code>JobConf</code> is used to specify other advanced facets
of the job such as <code>Comparator</code>s to be used, files to be put in
the {@link DistributedCache}, whether or not intermediate and/or job outputs
are to be compressed (and how), debugability via user-provided scripts
( {@link #setMapDebugScript(String)}/{@link #setReduceDebugScript(String)}),
for doing post-processing on task logs, task's stdout, stderr, syslog.
and etc.</p>
<p>Here is an example on how to configure a job via <code>JobConf</code>:</p>
<p><blockquote><pre>
// Create a new JobConf
JobConf job = new JobConf(new Configuration(), MyJob.class);
// Specify various job-specific parameters
job.setJobName("myjob");
FileInputFormat.setInputPaths(job, new Path("in"));
FileOutputFormat.setOutputPath(job, new Path("out"));
job.setMapperClass(MyJob.MyMapper.class);
job.setCombinerClass(MyJob.MyReducer.class);
job.setReducerClass(MyJob.MyReducer.class);
job.setInputFormat(SequenceFileInputFormat.class);
job.setOutputFormat(SequenceFileOutputFormat.class);
</pre></blockquote></p>
@see JobClient
@see ClusterStatus
@see Tool
@see DistributedCache
@deprecated Use {@link Configuration} instead]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobConf -->
<!-- start interface org.apache.hadoop.mapred.JobConfigurable -->
<interface name="JobConfigurable" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Initializes a new instance from a {@link JobConf}.
@param job the configuration]]>
</doc>
</method>
<doc>
<![CDATA[That what may be configured.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.JobConfigurable -->
<!-- start class org.apache.hadoop.mapred.JobContext -->
<class name="JobContext" extends="org.apache.hadoop.mapreduce.JobContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.JobContext} instead.">
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the job Configuration
@return JobConf]]>
</doc>
</method>
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the progress mechanism for reporting progress.
@return progress mechanism]]>
</doc>
</method>
<doc>
<![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.JobContext} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobContext -->
<!-- start class org.apache.hadoop.mapred.JobEndNotifier -->
<class name="JobEndNotifier" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobEndNotifier"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="startNotifier"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="stopNotifier"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="registerNotification"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="status" type="org.apache.hadoop.mapred.JobStatus"/>
</method>
<method name="localRunnerNotification"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="status" type="org.apache.hadoop.mapred.JobStatus"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.JobEndNotifier -->
<!-- start class org.apache.hadoop.mapred.JobHistory -->
<class name="JobHistory" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobHistory"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="init" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="hostname" type="java.lang.String"/>
<param name="jobTrackerStartTime" type="long"/>
<doc>
<![CDATA[Initialize JobHistory files.
@param conf Jobconf of the job tracker.
@param hostname jobtracker's hostname
@param jobTrackerStartTime jobtracker's start time
@return true if intialized properly
false otherwise]]>
</doc>
</method>
<method name="parseHistoryFromFS"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="path" type="java.lang.String"/>
<param name="l" type="org.apache.hadoop.mapred.JobHistory.Listener"/>
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Parses history file and invokes Listener.handle() for
each line of history. It can be used for looking through history
files for specific items without having to keep whole history in memory.
@param path path to history file
@param l Listener for history events
@param fs FileSystem where history file is present
@throws IOException]]>
</doc>
</method>
<method name="isDisableHistory" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns history disable status. by default history is enabled so this
method returns false.
@return true if history logging is disabled, false otherwise.]]>
</doc>
</method>
<method name="setDisableHistory"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="disableHistory" type="boolean"/>
<doc>
<![CDATA[Enable/disable history logging. Default value is false, so history
is enabled by default.
@param disableHistory true if history should be disabled, false otherwise.]]>
</doc>
</method>
<method name="getTaskLogsUrl" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="attempt" type="org.apache.hadoop.mapred.JobHistory.TaskAttempt"/>
<doc>
<![CDATA[Return the TaskLogsUrl of a particular TaskAttempt
@param attempt
@return the taskLogsUrl. null if http-port or tracker-name or
task-attempt-id are unavailable.]]>
</doc>
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOB_NAME_TRIM_LENGTH" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Provides methods for writing to and reading from job history.
Job History works in an append mode, JobHistory and its inner classes provide methods
to log job events.
JobHistory is split into multiple files, format of each file is plain text where each line
is of the format [type (key=value)*], where type identifies the type of the record.
Type maps to UID of one of the inner classes of this class.
Job history is maintained in a master index which contains star/stop times of all jobs with
a few other job level properties. Apart from this each job's history is maintained in a seperate history
file. name of job history files follows the format jobtrackerId_jobid
For parsing the job history it supports a listener based interface where each line is parsed
and passed to listener. The listener can create an object model of history or look for specific
events and discard rest of the history.
CHANGE LOG :
Version 0 : The history has the following format :
TAG KEY1="VALUE1" KEY2="VALUE2" and so on.
TAG can be Job, Task, MapAttempt or ReduceAttempt.
Note that a '"' is the line delimiter.
Version 1 : Changes the line delimiter to '.'
Values are now escaped for unambiguous parsing.
Added the Meta tag to store version info.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory -->
<!-- start class org.apache.hadoop.mapred.JobHistory.HistoryCleaner -->
<class name="JobHistory.HistoryCleaner" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.Runnable"/>
<constructor name="JobHistory.HistoryCleaner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Cleans up history data.]]>
</doc>
</method>
<doc>
<![CDATA[Delete history files older than one month. Update master index and remove all
jobs older than one month. Also if a job tracker has no jobs in last one month
remove reference to the job tracker.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.HistoryCleaner -->
<!-- start class org.apache.hadoop.mapred.JobHistory.JobInfo -->
<class name="JobHistory.JobInfo" extends="org.apache.hadoop.mapred.JobHistory.KeyValuePair"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobHistory.JobInfo" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create new JobInfo]]>
</doc>
</constructor>
<method name="getAllTasks" return="java.util.Map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns all map and reduce tasks <taskid-Task>.]]>
</doc>
</method>
<method name="getLocalJobFilePath" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<doc>
<![CDATA[Get the path of the locally stored job file
@param jobId id of the job
@return the path of the job file on the local file system]]>
</doc>
</method>
<method name="encodeJobHistoryFilePath" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="logFile" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Helper function to encode the URL of the path of the job-history
log file.
@param logFile path of the job-history file
@return URL encoded path
@throws IOException]]>
</doc>
</method>
<method name="encodeJobHistoryFileName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="logFileName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Helper function to encode the URL of the filename of the job-history
log file.
@param logFileName file name of the job-history file
@return URL encoded filename
@throws IOException]]>
</doc>
</method>
<method name="decodeJobHistoryFileName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="logFileName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Helper function to decode the URL of the filename of the job-history
log file.
@param logFileName file name of the job-history file
@return URL decoded filename
@throws IOException]]>
</doc>
</method>
<method name="getUserName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the user name from the job conf]]>
</doc>
</method>
<method name="getJobHistoryLogLocation" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="logFileName" type="java.lang.String"/>
<doc>
<![CDATA[Get the job history file path given the history filename]]>
</doc>
</method>
<method name="getJobHistoryLogLocationForUser" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="logFileName" type="java.lang.String"/>
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the user job history file path]]>
</doc>
</method>
<method name="getJobHistoryFileName" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="id" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Recover the job history filename from the history folder.
Uses the following pattern
$jt-hostname_[0-9]*_$job-id_$user-$job-name*
@param jobConf the job conf
@param id job id]]>
</doc>
</method>
<method name="recoverJobHistoryFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="logFilePath" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Selects one of the two files generated as a part of recovery.
The thumb rule is that always select the oldest file.
This call makes sure that only one file is left in the end.
@param conf job conf
@param logFilePath Path of the log file
@throws IOException]]>
</doc>
</method>
<method name="logSubmitted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="jobConfPath" type="java.lang.String"/>
<param name="submitTime" type="long"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Log job submitted event to history. Creates a new file in history
for the job. if history file creation fails, it disables history
for all other events.
@param jobId job id assigned by job tracker.
@param jobConf job conf of the job
@param jobConfPath path to job conf xml file in HDFS.
@param submitTime time when job tracker received the job
@throws IOException]]>
</doc>
</method>
<method name="logInited"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="startTime" type="long"/>
<param name="totalMaps" type="int"/>
<param name="totalReduces" type="int"/>
<doc>
<![CDATA[Logs launch time of job.
@param jobId job id, assigned by jobtracker.
@param startTime start time of job.
@param totalMaps total maps assigned by jobtracker.
@param totalReduces total reduces.]]>
</doc>
</method>
<method name="logStarted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use {@link #logInited(JobID, long, int, int)} and
{@link #logStarted(JobID)}">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="startTime" type="long"/>
<param name="totalMaps" type="int"/>
<param name="totalReduces" type="int"/>
<doc>
<![CDATA[Logs the job as RUNNING.
@param jobId job id, assigned by jobtracker.
@param startTime start time of job.
@param totalMaps total maps assigned by jobtracker.
@param totalReduces total reduces.
@deprecated Use {@link #logInited(JobID, long, int, int)} and
{@link #logStarted(JobID)}]]>
</doc>
</method>
<method name="logStarted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<doc>
<![CDATA[Logs job as running
@param jobId job id, assigned by jobtracker.]]>
</doc>
</method>
<method name="logFinished"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="finishTime" type="long"/>
<param name="finishedMaps" type="int"/>
<param name="finishedReduces" type="int"/>
<param name="failedMaps" type="int"/>
<param name="failedReduces" type="int"/>
<param name="counters" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Log job finished. closes the job file in history.
@param jobId job id, assigned by jobtracker.
@param finishTime finish time of job in ms.
@param finishedMaps no of maps successfully finished.
@param finishedReduces no of reduces finished sucessfully.
@param failedMaps no of failed map tasks.
@param failedReduces no of failed reduce tasks.
@param counters the counters from the job]]>
</doc>
</method>
<method name="logFailed"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<param name="timestamp" type="long"/>
<param name="finishedMaps" type="int"/>
<param name="finishedReduces" type="int"/>
<doc>
<![CDATA[Logs job failed event. Closes the job history log file.
@param jobid job id
@param timestamp time when job failure was detected in ms.
@param finishedMaps no finished map tasks.
@param finishedReduces no of finished reduce tasks.]]>
</doc>
</method>
<method name="logKilled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<param name="timestamp" type="long"/>
<param name="finishedMaps" type="int"/>
<param name="finishedReduces" type="int"/>
<doc>
<![CDATA[Logs job killed event. Closes the job history log file.
@param jobid
job id
@param timestamp
time when job killed was issued in ms.
@param finishedMaps
no finished map tasks.
@param finishedReduces
no of finished reduce tasks.]]>
</doc>
</method>
<method name="logJobPriority"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<param name="priority" type="org.apache.hadoop.mapred.JobPriority"/>
<doc>
<![CDATA[Log job's priority.
@param jobid job id
@param priority Jobs priority]]>
</doc>
</method>
<method name="logJobInfo"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use {@link #logJobInfo(JobID, long, long)} instead.">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<param name="submitTime" type="long"/>
<param name="launchTime" type="long"/>
<param name="restartCount" type="int"/>
<doc>
<![CDATA[Log job's submit-time/launch-time
@param jobid job id
@param submitTime job's submit time
@param launchTime job's launch time
@param restartCount number of times the job got restarted
@deprecated Use {@link #logJobInfo(JobID, long, long)} instead.]]>
</doc>
</method>
<method name="logJobInfo"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<param name="submitTime" type="long"/>
<param name="launchTime" type="long"/>
</method>
<doc>
<![CDATA[Helper class for logging or reading back events related to job start, finish or failure.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.JobInfo -->
<!-- start class org.apache.hadoop.mapred.JobHistory.Keys -->
<class name="JobHistory.Keys" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobHistory.Keys[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.Keys"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="JOBTRACKERID" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="START_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FINISH_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOBID" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOBNAME" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="USER" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOBCONF" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SUBMIT_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LAUNCH_TIME" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TOTAL_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TOTAL_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FINISHED_MAPS" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FINISHED_REDUCES" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOB_STATUS" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TASKID" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="HOSTNAME" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TASK_TYPE" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="ERROR" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TASK_ATTEMPT_ID" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TASK_STATUS" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="COPY_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SORT_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="REDUCE_PHASE" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SHUFFLE_FINISHED" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SORT_FINISHED" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="COUNTERS" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SPLITS" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="JOB_PRIORITY" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="HTTP_PORT" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TRACKER_NAME" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="STATE_STRING" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="VERSION" type="org.apache.hadoop.mapred.JobHistory.Keys"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Job history files contain key="value" pairs, where keys belong to this enum.
It acts as a global namespace for all keys.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.Keys -->
<!-- start interface org.apache.hadoop.mapred.JobHistory.Listener -->
<interface name="JobHistory.Listener" abstract="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="handle"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="recType" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"/>
<param name="values" type="java.util.Map"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Callback method for history parser.
@param recType type of record, which is the first entry in the line.
@param values a map of key-value pairs as thry appear in history.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Callback interface for reading back log events from JobHistory. This interface
should be implemented and passed to JobHistory.parseHistory()]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.JobHistory.Listener -->
<!-- start class org.apache.hadoop.mapred.JobHistory.MapAttempt -->
<class name="JobHistory.MapAttempt" extends="org.apache.hadoop.mapred.JobHistory.TaskAttempt"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobHistory.MapAttempt"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="logStarted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logStarted(TaskAttemptID, long, String, int, String)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="startTime" type="long"/>
<param name="hostName" type="java.lang.String"/>
<doc>
<![CDATA[Log start time of this map task attempt.
@param taskAttemptId task attempt id
@param startTime start time of task attempt as reported by task tracker.
@param hostName host name of the task attempt.
@deprecated Use
{@link #logStarted(TaskAttemptID, long, String, int, String)}]]>
</doc>
</method>
<method name="logStarted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="startTime" type="long"/>
<param name="trackerName" type="java.lang.String"/>
<param name="httpPort" type="int"/>
<param name="taskType" type="java.lang.String"/>
<doc>
<![CDATA[Log start time of this map task attempt.
@param taskAttemptId task attempt id
@param startTime start time of task attempt as reported by task tracker.
@param trackerName name of the tracker executing the task attempt.
@param httpPort http port of the task tracker executing the task attempt
@param taskType Whether the attempt is cleanup or setup or map]]>
</doc>
</method>
<method name="logFinished"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logFinished(TaskAttemptID, long, String, String, String, Counters)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="finishTime" type="long"/>
<param name="hostName" type="java.lang.String"/>
<doc>
<![CDATA[Log finish time of map task attempt.
@param taskAttemptId task attempt id
@param finishTime finish time
@param hostName host name
@deprecated Use
{@link #logFinished(TaskAttemptID, long, String, String, String, Counters)}]]>
</doc>
</method>
<method name="logFinished"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="finishTime" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="taskType" type="java.lang.String"/>
<param name="stateString" type="java.lang.String"/>
<param name="counter" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Log finish time of map task attempt.
@param taskAttemptId task attempt id
@param finishTime finish time
@param hostName host name
@param taskType Whether the attempt is cleanup or setup or map
@param stateString state string of the task attempt
@param counter counters of the task attempt]]>
</doc>
</method>
<method name="logFailed"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logFailed(TaskAttemptID, long, String, String, String)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<doc>
<![CDATA[Log task attempt failed event.
@param taskAttemptId task attempt id
@param timestamp timestamp
@param hostName hostname of this task attempt.
@param error error message if any for this task attempt.
@deprecated Use
{@link #logFailed(TaskAttemptID, long, String, String, String)}]]>
</doc>
</method>
<method name="logFailed"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<param name="taskType" type="java.lang.String"/>
<doc>
<![CDATA[Log task attempt failed event.
@param taskAttemptId task attempt id
@param timestamp timestamp
@param hostName hostname of this task attempt.
@param error error message if any for this task attempt.
@param taskType Whether the attempt is cleanup or setup or map]]>
</doc>
</method>
<method name="logKilled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logKilled(TaskAttemptID, long, String, String, String)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<doc>
<![CDATA[Log task attempt killed event.
@param taskAttemptId task attempt id
@param timestamp timestamp
@param hostName hostname of this task attempt.
@param error error message if any for this task attempt.
@deprecated Use
{@link #logKilled(TaskAttemptID, long, String, String, String)}]]>
</doc>
</method>
<method name="logKilled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<param name="taskType" type="java.lang.String"/>
<doc>
<![CDATA[Log task attempt killed event.
@param taskAttemptId task attempt id
@param timestamp timestamp
@param hostName hostname of this task attempt.
@param error error message if any for this task attempt.
@param taskType Whether the attempt is cleanup or setup or map]]>
</doc>
</method>
<doc>
<![CDATA[Helper class for logging or reading back events related to start, finish or failure of
a Map Attempt on a node.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.MapAttempt -->
<!-- start class org.apache.hadoop.mapred.JobHistory.RecordTypes -->
<class name="JobHistory.RecordTypes" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobHistory.RecordTypes[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.RecordTypes"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="Jobtracker" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="Job" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="Task" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MapAttempt" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="ReduceAttempt" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="Meta" type="org.apache.hadoop.mapred.JobHistory.RecordTypes"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Record types are identifiers for each line of log in history files.
A record type appears as the first token in a single line of log.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.RecordTypes -->
<!-- start class org.apache.hadoop.mapred.JobHistory.ReduceAttempt -->
<class name="JobHistory.ReduceAttempt" extends="org.apache.hadoop.mapred.JobHistory.TaskAttempt"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobHistory.ReduceAttempt"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="logStarted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logStarted(TaskAttemptID, long, String, int, String)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="startTime" type="long"/>
<param name="hostName" type="java.lang.String"/>
<doc>
<![CDATA[Log start time of Reduce task attempt.
@param taskAttemptId task attempt id
@param startTime start time
@param hostName host name
@deprecated Use
{@link #logStarted(TaskAttemptID, long, String, int, String)}]]>
</doc>
</method>
<method name="logStarted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="startTime" type="long"/>
<param name="trackerName" type="java.lang.String"/>
<param name="httpPort" type="int"/>
<param name="taskType" type="java.lang.String"/>
<doc>
<![CDATA[Log start time of Reduce task attempt.
@param taskAttemptId task attempt id
@param startTime start time
@param trackerName tracker name
@param httpPort the http port of the tracker executing the task attempt
@param taskType Whether the attempt is cleanup or setup or reduce]]>
</doc>
</method>
<method name="logFinished"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logFinished(TaskAttemptID, long, long, long, String, String, String, Counters)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="shuffleFinished" type="long"/>
<param name="sortFinished" type="long"/>
<param name="finishTime" type="long"/>
<param name="hostName" type="java.lang.String"/>
<doc>
<![CDATA[Log finished event of this task.
@param taskAttemptId task attempt id
@param shuffleFinished shuffle finish time
@param sortFinished sort finish time
@param finishTime finish time of task
@param hostName host name where task attempt executed
@deprecated Use
{@link #logFinished(TaskAttemptID, long, long, long, String, String, String, Counters)}]]>
</doc>
</method>
<method name="logFinished"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="shuffleFinished" type="long"/>
<param name="sortFinished" type="long"/>
<param name="finishTime" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="taskType" type="java.lang.String"/>
<param name="stateString" type="java.lang.String"/>
<param name="counter" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Log finished event of this task.
@param taskAttemptId task attempt id
@param shuffleFinished shuffle finish time
@param sortFinished sort finish time
@param finishTime finish time of task
@param hostName host name where task attempt executed
@param taskType Whether the attempt is cleanup or setup or reduce
@param stateString the state string of the attempt
@param counter counters of the attempt]]>
</doc>
</method>
<method name="logFailed"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logFailed(TaskAttemptID, long, String, String, String)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<doc>
<![CDATA[Log failed reduce task attempt.
@param taskAttemptId task attempt id
@param timestamp time stamp when task failed
@param hostName host name of the task attempt.
@param error error message of the task.
@deprecated Use
{@link #logFailed(TaskAttemptID, long, String, String, String)}]]>
</doc>
</method>
<method name="logFailed"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<param name="taskType" type="java.lang.String"/>
<doc>
<![CDATA[Log failed reduce task attempt.
@param taskAttemptId task attempt id
@param timestamp time stamp when task failed
@param hostName host name of the task attempt.
@param error error message of the task.
@param taskType Whether the attempt is cleanup or setup or reduce]]>
</doc>
</method>
<method name="logKilled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use
{@link #logKilled(TaskAttemptID, long, String, String, String)}">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<doc>
<![CDATA[Log killed reduce task attempt.
@param taskAttemptId task attempt id
@param timestamp time stamp when task failed
@param hostName host name of the task attempt.
@param error error message of the task.
@deprecated Use
{@link #logKilled(TaskAttemptID, long, String, String, String)}]]>
</doc>
</method>
<method name="logKilled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskAttemptId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="timestamp" type="long"/>
<param name="hostName" type="java.lang.String"/>
<param name="error" type="java.lang.String"/>
<param name="taskType" type="java.lang.String"/>
<doc>
<![CDATA[Log killed reduce task attempt.
@param taskAttemptId task attempt id
@param timestamp time stamp when task failed
@param hostName host name of the task attempt.
@param error error message of the task.
@param taskType Whether the attempt is cleanup or setup or reduce]]>
</doc>
</method>
<doc>
<![CDATA[Helper class for logging or reading back events related to start, finish or failure of
a Map Attempt on a node.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.ReduceAttempt -->
<!-- start class org.apache.hadoop.mapred.JobHistory.Task -->
<class name="JobHistory.Task" extends="org.apache.hadoop.mapred.JobHistory.KeyValuePair"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobHistory.Task"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="logStarted"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
<param name="taskType" type="java.lang.String"/>
<param name="startTime" type="long"/>
<param name="splitLocations" type="java.lang.String"/>
<doc>
<![CDATA[Log start time of task (TIP).
@param taskId task id
@param taskType MAP or REDUCE
@param startTime startTime of tip.]]>
</doc>
</method>
<method name="logFinished"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
<param name="taskType" type="java.lang.String"/>
<param name="finishTime" type="long"/>
<param name="counters" type="org.apache.hadoop.mapred.Counters"/>
<doc>
<![CDATA[Log finish time of task.
@param taskId task id
@param taskType MAP or REDUCE
@param finishTime finish timeof task in ms]]>
</doc>
</method>
<method name="logUpdates"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
<param name="finishTime" type="long"/>
<doc>
<![CDATA[Update the finish time of task.
@param taskId task id
@param finishTime finish time of task in ms]]>
</doc>
</method>
<method name="logFailed"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
<param name="taskType" type="java.lang.String"/>
<param name="time" type="long"/>
<param name="error" type="java.lang.String"/>
<doc>
<![CDATA[Log job failed event.
@param taskId task id
@param taskType MAP or REDUCE.
@param time timestamp when job failed detected.
@param error error message for failure.]]>
</doc>
</method>
<method name="logFailed"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskID"/>
<param name="taskType" type="java.lang.String"/>
<param name="time" type="long"/>
<param name="error" type="java.lang.String"/>
<param name="failedDueToAttempt" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[@param failedDueToAttempt The attempt that caused the failure, if any]]>
</doc>
</method>
<method name="getTaskAttempts" return="java.util.Map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns all task attempts for this task. <task attempt id - TaskAttempt>]]>
</doc>
</method>
<doc>
<![CDATA[Helper class for logging or reading back events related to Task's start, finish or failure.
All events logged by this class are logged in a separate file per job in
job tracker history. These events map to TIPs in jobtracker.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.Task -->
<!-- start class org.apache.hadoop.mapred.JobHistory.TaskAttempt -->
<class name="JobHistory.TaskAttempt" extends="org.apache.hadoop.mapred.JobHistory.Task"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobHistory.TaskAttempt"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Base class for Map and Reduce TaskAttempts.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.TaskAttempt -->
<!-- start class org.apache.hadoop.mapred.JobHistory.Values -->
<class name="JobHistory.Values" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobHistory.Values[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.JobHistory.Values"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="SUCCESS" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="KILLED" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MAP" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="REDUCE" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="CLEANUP" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RUNNING" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="PREP" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SETUP" type="org.apache.hadoop.mapred.JobHistory.Values"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This enum contains some of the values commonly used by history log events.
since values in history can only be strings - Values.name() is used in
most places in history file.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobHistory.Values -->
<!-- start class org.apache.hadoop.mapred.JobID -->
<class name="JobID" extends="org.apache.hadoop.mapreduce.JobID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobID" type="java.lang.String, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a JobID object
@param jtIdentifier jobTracker identifier
@param id job number]]>
</doc>
</constructor>
<constructor name="JobID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="downgrade" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="old" type="org.apache.hadoop.mapreduce.JobID"/>
<doc>
<![CDATA[Downgrade a new JobID to an old one
@param old a new or old JobID
@return either old or a new JobID build to match old]]>
</doc>
</method>
<method name="read" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a JobId object from given string
@return constructed JobId object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<method name="getJobIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>any job</i>
run on the jobtracker started at <i>200707121733</i>, we would use :
<pre>
JobID.getTaskIDsPattern("200707121733", null);
</pre>
which will return :
<pre> "job_200707121733_[0-9]*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@return a regex pattern matching JobIDs]]>
</doc>
</method>
<doc>
<![CDATA[JobID represents the immutable and unique identifier for
the job. JobID consists of two parts. First part
represents the jobtracker identifier, so that jobID to jobtracker map
is defined. For cluster setup this string is the jobtracker
start time, for local setting, it is "local".
Second part of the JobID is the job number. <br>
An example JobID is :
<code>job_200707121733_0003</code> , which represents the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse JobID strings, but rather
use appropriate constructors or {@link #forName(String)} method.
@see TaskID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobID -->
<!-- start class org.apache.hadoop.mapred.JobPriority -->
<class name="JobPriority" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobPriority[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.JobPriority"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="VERY_HIGH" type="org.apache.hadoop.mapred.JobPriority"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="HIGH" type="org.apache.hadoop.mapred.JobPriority"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="NORMAL" type="org.apache.hadoop.mapred.JobPriority"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LOW" type="org.apache.hadoop.mapred.JobPriority"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="VERY_LOW" type="org.apache.hadoop.mapred.JobPriority"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Used to describe the priority of the running job.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobPriority -->
<!-- start class org.apache.hadoop.mapred.JobProfile -->
<class name="JobProfile" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="JobProfile"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct an empty {@link JobProfile}.]]>
</doc>
</constructor>
<constructor name="JobProfile" type="java.lang.String, org.apache.hadoop.mapreduce.JobID, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a {@link JobProfile} the userid, jobid,
job config-file, job-details url and job name.
@param user userid of the person who submitted the job.
@param jobid id of the job.
@param jobFile job configuration file.
@param url link to the web-ui for details of the job.
@param name user-specified job name.]]>
</doc>
</constructor>
<constructor name="JobProfile" type="java.lang.String, org.apache.hadoop.mapreduce.JobID, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a {@link JobProfile} the userid, jobid,
job config-file, job-details url and job name.
@param user userid of the person who submitted the job.
@param jobid id of the job.
@param jobFile job configuration file.
@param url link to the web-ui for details of the job.
@param name user-specified job name.
@param queueName name of the queue to which the job is submitted]]>
</doc>
</constructor>
<constructor name="JobProfile" type="java.lang.String, java.lang.String, java.lang.String, java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="use JobProfile(String, JobID, String, String, String) instead">
<doc>
<![CDATA[@deprecated use JobProfile(String, JobID, String, String, String) instead]]>
</doc>
</constructor>
<method name="getUser" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user id.]]>
</doc>
</method>
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the job id.]]>
</doc>
</method>
<method name="getJobId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use getJobID() instead">
<doc>
<![CDATA[@deprecated use getJobID() instead]]>
</doc>
</method>
<method name="getJobFile" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the configuration file for the job.]]>
</doc>
</method>
<method name="getURL" return="java.net.URL"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the link to the web-ui for details of the job.]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified job name.]]>
</doc>
</method>
<method name="getQueueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the name of the queue to which the job is submitted.
@return name of the queue.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A JobProfile is a MapReduce primitive. Tracks a job,
whether living or dead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobProfile -->
<!-- start class org.apache.hadoop.mapred.JobQueueInfo -->
<class name="JobQueueInfo" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="JobQueueInfo"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for Job Queue Info.]]>
</doc>
</constructor>
<constructor name="JobQueueInfo" type="java.lang.String, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a new JobQueueInfo object using the queue name and the
scheduling information passed.
@param queueName Name of the job queue
@param schedulingInfo Scheduling Information associated with the job
queue]]>
</doc>
</constructor>
<method name="setQueueName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queueName" type="java.lang.String"/>
<doc>
<![CDATA[Set the queue name of the JobQueueInfo
@param queueName Name of the job queue.]]>
</doc>
</method>
<method name="getQueueName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the queue name from JobQueueInfo
@return queue name]]>
</doc>
</method>
<method name="setSchedulingInfo"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="schedulingInfo" type="java.lang.String"/>
<doc>
<![CDATA[Set the scheduling information associated to particular job queue
@param schedulingInfo]]>
</doc>
</method>
<method name="getSchedulingInfo" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the scheduling information associated to particular job queue.
If nothing is set would return <b>"N/A"</b>
@return Scheduling information associated to particular Job Queue]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Class that contains the information regarding the Job Queues which are
maintained by the Hadoop Map/Reduce framework.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobQueueInfo -->
<!-- start class org.apache.hadoop.mapred.JobStatus -->
<class name="JobStatus" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Cloneable"/>
<constructor name="JobStatus"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on cleanup
@param runState The current state of the job]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param runState The current state of the job]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, int, org.apache.hadoop.mapred.JobPriority"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param runState The current state of the job
@param jp Priority of the job.]]>
</doc>
</constructor>
<constructor name="JobStatus" type="org.apache.hadoop.mapred.JobID, float, float, float, float, int, org.apache.hadoop.mapred.JobPriority"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a job status object for a given jobid.
@param jobid The jobid of the job
@param setupProgress The progress made on the setup
@param mapProgress The progress made on the maps
@param reduceProgress The progress made on the reduces
@param cleanupProgress The progress made on the cleanup
@param runState The current state of the job
@param jp Priority of the job.]]>
</doc>
</constructor>
<method name="getJobId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use getJobID instead">
<doc>
<![CDATA[@deprecated use getJobID instead]]>
</doc>
</method>
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return The jobid of the Job]]>
</doc>
</method>
<method name="mapProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in maps]]>
</doc>
</method>
<method name="cleanupProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in cleanup]]>
</doc>
</method>
<method name="setupProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in setup]]>
</doc>
</method>
<method name="reduceProgress" return="float"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return Percentage of progress in reduce]]>
</doc>
</method>
<method name="getRunState" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return running state of the job]]>
</doc>
</method>
<method name="setRunState"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="state" type="int"/>
<doc>
<![CDATA[Change the current run state of the job.]]>
</doc>
</method>
<method name="getStartTime" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return start time of the job]]>
</doc>
</method>
<method name="clone" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getUsername" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the username of the job]]>
</doc>
</method>
<method name="getSchedulingInfo" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the Scheduling information associated to a particular Job.
@return the scheduling information of the job]]>
</doc>
</method>
<method name="setSchedulingInfo"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="schedulingInfo" type="java.lang.String"/>
<doc>
<![CDATA[Used to set the scheduling information associated to a particular Job.
@param schedulingInfo Scheduling information of the job]]>
</doc>
</method>
<method name="getJobPriority" return="org.apache.hadoop.mapred.JobPriority"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the priority of the job
@return job priority]]>
</doc>
</method>
<method name="setJobPriority"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jp" type="org.apache.hadoop.mapred.JobPriority"/>
<doc>
<![CDATA[Set the priority of the job, defaulting to NORMAL.
@param jp new job priority]]>
</doc>
</method>
<method name="isJobComplete" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns true if the status is for a completed job.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="RUNNING" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SUCCEEDED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="PREP" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="KILLED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Describes the current status of a job. This is
not intended to be a comprehensive piece of data.
For that, look at JobProfile.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobStatus -->
<!-- start class org.apache.hadoop.mapred.JobTracker -->
<class name="JobTracker" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.MRConstants"/>
<implements name="org.apache.hadoop.mapred.InterTrackerProtocol"/>
<implements name="org.apache.hadoop.mapred.JobSubmissionProtocol"/>
<implements name="org.apache.hadoop.mapred.TaskTrackerManager"/>
<implements name="org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol"/>
<method name="startTracker" return="org.apache.hadoop.mapred.JobTracker"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Start the JobTracker with given configuration.
The conf will be modified to reflect the actual ports on which
the JobTracker is up and running if the user passes the port as
<code>zero</code>.
@param conf configuration for the JobTracker.
@throws IOException]]>
</doc>
</method>
<method name="stopTracker"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProtocolVersion" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="protocol" type="java.lang.String"/>
<param name="clientVersion" type="long"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="hasRestarted" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Whether the JT has restarted]]>
</doc>
</method>
<method name="hasRecovered" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Whether the JT has recovered upon restart]]>
</doc>
</method>
<method name="getRecoveryDuration" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[How long the jobtracker took to recover from restart.]]>
</doc>
</method>
<method name="getInstrumentationClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="setInstrumentationClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="t" type="java.lang.Class"/>
</method>
<method name="getAddress" return="java.net.InetSocketAddress"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="offerService"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Run forever]]>
</doc>
</method>
<method name="getTotalSubmissions" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobTrackerMachine" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getTrackerIdentifier" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the unique identifier (ie. timestamp) of this job tracker start.
@return a string with a unique identifier]]>
</doc>
</method>
<method name="getTrackerPort" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getInfoPort" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getStartTime" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="runningJobs" return="java.util.Vector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getRunningJobs" return="java.util.List"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Version that is called from a timer thread, and therefore needs to be
careful to synchronize.]]>
</doc>
</method>
<method name="failedJobs" return="java.util.Vector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="completedJobs" return="java.util.Vector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="taskTrackers" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get all the task trackers in the cluster
@return {@link Collection} of {@link TaskTrackerStatus}]]>
</doc>
</method>
<method name="activeTaskTrackers" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the active task tracker statuses in the cluster
@return {@link Collection} of active {@link TaskTrackerStatus}]]>
</doc>
</method>
<method name="taskTrackerNames" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the active and blacklisted task tracker names in the cluster. The first
element in the returned list contains the list of active tracker names.
The second element in the returned list contains the list of blacklisted
tracker names.]]>
</doc>
</method>
<method name="blacklistedTaskTrackers" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the blacklisted task tracker statuses in the cluster
@return {@link Collection} of blacklisted {@link TaskTrackerStatus}]]>
</doc>
</method>
<method name="isBlacklisted" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="trackerID" type="java.lang.String"/>
<doc>
<![CDATA[Whether the tracker is blacklisted or not
@param trackerID
@return true if blacklisted, false otherwise]]>
</doc>
</method>
<method name="getTaskTracker" return="org.apache.hadoop.mapred.TaskTrackerStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="trackerID" type="java.lang.String"/>
</method>
<method name="resolveAndAddToTopology" return="org.apache.hadoop.net.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<method name="getNodesAtMaxLevel" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns a collection of nodes at the max level]]>
</doc>
</method>
<method name="getParentNode" return="org.apache.hadoop.net.Node"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="node" type="org.apache.hadoop.net.Node"/>
<param name="level" type="int"/>
</method>
<method name="getNode" return="org.apache.hadoop.net.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Return the Node in the network topology that corresponds to the hostname]]>
</doc>
</method>
<method name="getNumTaskCacheLevels" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getNumResolvedTaskTrackers" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getNumberOfUniqueHosts" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="addJobInProgressListener"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="listener" type="org.apache.hadoop.mapred.JobInProgressListener"/>
</method>
<method name="removeJobInProgressListener"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="listener" type="org.apache.hadoop.mapred.JobInProgressListener"/>
</method>
<method name="getQueueManager" return="org.apache.hadoop.mapred.QueueManager"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the {@link QueueManager} associated with the JobTracker.]]>
</doc>
</method>
<method name="getBuildVersion" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="heartbeat" return="org.apache.hadoop.mapred.HeartbeatResponse"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="org.apache.hadoop.mapred.TaskTrackerStatus"/>
<param name="restarted" type="boolean"/>
<param name="initialContact" type="boolean"/>
<param name="acceptNewTasks" type="boolean"/>
<param name="responseId" type="short"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The periodic heartbeat mechanism between the {@link TaskTracker} and
the {@link JobTracker}.
The {@link JobTracker} processes the status information sent by the
{@link TaskTracker} and responds with instructions to start/stop
tasks or jobs, and also 'reset' instructions during contingencies.]]>
</doc>
</method>
<method name="getNextHeartbeatInterval" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Calculates next heartbeat interval using cluster size.
Heartbeat interval is incremented 1second for every 50 nodes.
@return next heartbeat interval.]]>
</doc>
</method>
<method name="getFilesystemName" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Grab the local fs name]]>
</doc>
</method>
<method name="reportTaskTrackerError"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskTracker" type="java.lang.String"/>
<param name="errorClass" type="java.lang.String"/>
<param name="errorMessage" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getNewJobId" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Allocates a new JobId string.]]>
</doc>
</method>
<method name="submitJob" return="org.apache.hadoop.mapred.JobStatus"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[JobTracker.submitJob() kicks off a new job.
Create a 'JobInProgress' object, which contains both JobProfile
and JobStatus. Those two sub-objects are sometimes shipped outside
of the JobTracker. But JobInProgress adds info that's useful for
the JobTracker alone.]]>
</doc>
</method>
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="use {@link #getClusterStatus(boolean)}">
<doc>
<![CDATA[@deprecated use {@link #getClusterStatus(boolean)}]]>
</doc>
</method>
<method name="getClusterStatus" return="org.apache.hadoop.mapred.ClusterStatus"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="detailed" type="boolean"/>
</method>
<method name="killJob"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setJobPriority"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<param name="priority" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the priority of a job
@param jobid id of the job
@param priority new priority of the job]]>
</doc>
</method>
<method name="getJobProfile" return="org.apache.hadoop.mapred.JobProfile"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getJobStatus" return="org.apache.hadoop.mapred.JobStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getJobCounters" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getMapTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getReduceTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getCleanupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getSetupTaskReports" return="org.apache.hadoop.mapred.TaskReport[]"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
<param name="fromEventId" type="int"/>
<param name="maxEvents" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getTaskDiagnostics" return="java.lang.String[]"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the diagnostics for a given task
@param taskId the id of the task
@return an array of the diagnostic messages]]>
</doc>
</method>
<method name="getTip" return="org.apache.hadoop.mapred.TaskInProgress"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="tipid" type="org.apache.hadoop.mapred.TaskID"/>
<doc>
<![CDATA[Returns specified TaskInProgress, or null.]]>
</doc>
</method>
<method name="killTask" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="shouldFail" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Mark a Task to be killed]]>
</doc>
</method>
<method name="getAssignedTracker" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[Get tracker name for a given task id.
@param taskId the name of the task
@return The name of the task tracker]]>
</doc>
</method>
<method name="jobsToComplete" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getAllJobs" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getSystemDir" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@see org.apache.hadoop.mapred.JobSubmissionProtocol#getSystemDir()]]>
</doc>
</method>
<method name="getJob" return="org.apache.hadoop.mapred.JobInProgress"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobid" type="org.apache.hadoop.mapred.JobID"/>
</method>
<method name="getLocalJobFilePath" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<doc>
<![CDATA[Get the localized job file path on the job trackers local file system
@param jobId id of the job
@return the path of the job conf file on the local file system]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Start the JobTracker process. This is used only for debugging. As a rule,
JobTracker should be run as part of the DFS Namenode process.]]>
</doc>
</method>
<method name="getQueues" return="org.apache.hadoop.mapred.JobQueueInfo[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getQueueInfo" return="org.apache.hadoop.mapred.JobQueueInfo"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queue" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getJobsFromQueue" return="org.apache.hadoop.mapred.JobStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="queue" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="refreshServiceAcl"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[JobTracker is the central location for submitting and
tracking MR jobs in a network environment.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobTracker -->
<!-- start class org.apache.hadoop.mapred.JobTracker.IllegalStateException -->
<class name="JobTracker.IllegalStateException" extends="java.io.IOException"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobTracker.IllegalStateException" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[A client tried to submit a job before the Job Tracker was ready.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.JobTracker.IllegalStateException -->
<!-- start class org.apache.hadoop.mapred.JobTracker.State -->
<class name="JobTracker.State" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.JobTracker.State[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.JobTracker.State"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="INITIALIZING" type="org.apache.hadoop.mapred.JobTracker.State"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RUNNING" type="org.apache.hadoop.mapred.JobTracker.State"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.JobTracker.State -->
<!-- start class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
<class name="KeyValueLineRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="KeyValueLineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="getKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createKey" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="findSeparator" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="utf" type="byte[]"/>
<param name="start" type="int"/>
<param name="length" type="int"/>
<param name="sep" type="byte"/>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read key/value pair in a line.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class treats a line in the input as a key/value pair separated by a
separator character. The separator can be specified in config file
under the attribute name key.value.separator.in.input.line. The default
separator is the tab character ('\t').]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.KeyValueLineRecordReader -->
<!-- start class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
<class name="KeyValueTextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="KeyValueTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
Either linefeed or carriage-return are used to signal end of line. Each line
is divided into key and value parts by a separator byte. If no such a byte
exists, the key will be the entire line and value will be empty.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.KeyValueTextInputFormat -->
<!-- start class org.apache.hadoop.mapred.LineRecordReader -->
<class name="LineRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use
{@link org.apache.hadoop.mapreduce.lib.input.LineRecordReader} instead.">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="LineRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<constructor name="LineRecordReader" type="java.io.InputStream, long, long, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="LineRecordReader" type="java.io.InputStream, long, long, org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="createKey" return="org.apache.hadoop.io.LongWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.LongWritable"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read a line.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the progress within the split]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Treats keys as offset in file and value as line.
@deprecated Use
{@link org.apache.hadoop.mapreduce.lib.input.LineRecordReader} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.LineRecordReader -->
<!-- start class org.apache.hadoop.mapred.LineRecordReader.LineReader -->
<class name="LineRecordReader.LineReader" extends="org.apache.hadoop.util.LineReader"
abstract="false"
static="true" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.util.LineReader} instead.">
<constructor name="LineRecordReader.LineReader" type="java.io.InputStream, org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<doc>
<![CDATA[A class that provides a line reader from an input stream.
@deprecated Use {@link org.apache.hadoop.util.LineReader} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.LineRecordReader.LineReader -->
<!-- start class org.apache.hadoop.mapred.MapFileOutputFormat -->
<class name="MapFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MapFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getReaders" return="org.apache.hadoop.io.MapFile.Reader[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Open the output generated by this format.]]>
</doc>
</method>
<method name="getEntry" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="readers" type="org.apache.hadoop.io.MapFile.Reader[]"/>
<param name="partitioner" type="org.apache.hadoop.mapred.Partitioner"/>
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<param name="value" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get an entry from output generated by this class.]]>
</doc>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes {@link MapFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MapFileOutputFormat -->
<!-- start interface org.apache.hadoop.mapred.Mapper -->
<interface name="Mapper" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead.">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<implements name="org.apache.hadoop.io.Closeable"/>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Maps a single input key/value pair into an intermediate key/value pair.
<p>Output pairs need not be of the same types as input pairs. A given
input pair may map to zero or many output pairs. Output pairs are
collected with calls to
{@link OutputCollector#collect(Object,Object)}.</p>
<p>Applications can use the {@link Reporter} provided to report progress
or just indicate that they are alive. In scenarios where the application
takes an insignificant amount of time to process individual key/value
pairs, this is crucial since the framework might assume that the task has
timed-out and kill that task. The other way of avoiding this is to set
<a href="{@docRoot}/../mapred-default.html#mapred.task.timeout">
mapred.task.timeout</a> to a high-enough value (or even zero for no
time-outs).</p>
@param key the input key.
@param value the input value.
@param output collects mapped keys and values.
@param reporter facility to report progress.]]>
</doc>
</method>
<doc>
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
<p>Maps are the individual tasks which transform input records into a
intermediate records. The transformed intermediate records need not be of
the same type as the input records. A given input pair may map to zero or
many output pairs.</p>
<p>The Hadoop Map-Reduce framework spawns one map task for each
{@link InputSplit} generated by the {@link InputFormat} for the job.
<code>Mapper</code> implementations can access the {@link JobConf} for the
job via the {@link JobConfigurable#configure(JobConf)} and initialize
themselves. Similarly they can use the {@link Closeable#close()} method for
de-initialization.</p>
<p>The framework then calls
{@link #map(Object, Object, OutputCollector, Reporter)}
for each key/value pair in the <code>InputSplit</code> for that task.</p>
<p>All intermediate values associated with a given output key are
subsequently grouped by the framework, and passed to a {@link Reducer} to
determine the final output. Users can control the grouping by specifying
a <code>Comparator</code> via
{@link JobConf#setOutputKeyComparatorClass(Class)}.</p>
<p>The grouped <code>Mapper</code> outputs are partitioned per
<code>Reducer</code>. Users can control which keys (and hence records) go to
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
<p>Users can optionally specify a <code>combiner</code>, via
{@link JobConf#setCombinerClass(Class)}, to perform local aggregation of the
intermediate outputs, which helps to cut down the amount of data transferred
from the <code>Mapper</code> to the <code>Reducer</code>.
<p>The intermediate, grouped outputs are always stored in
{@link SequenceFile}s. Applications can specify if and how the intermediate
outputs are to be compressed and which {@link CompressionCodec}s are to be
used via the <code>JobConf</code>.</p>
<p>If the job has
<a href="{@docRoot}/org/apache/hadoop/mapred/JobConf.html#ReducerNone">zero
reduces</a> then the output of the <code>Mapper</code> is directly written
to the {@link FileSystem} without grouping by keys.</p>
<p>Example:</p>
<p><blockquote><pre>
public class MyMapper&lt;K extends WritableComparable, V extends Writable&gt;
extends MapReduceBase implements Mapper&lt;K, V, K, V&gt; {
static enum MyCounters { NUM_RECORDS }
private String mapTaskId;
private String inputFile;
private int noRecords = 0;
public void configure(JobConf job) {
mapTaskId = job.get("mapred.task.id");
inputFile = job.get("map.input.file");
}
public void map(K key, V val,
OutputCollector&lt;K, V&gt; output, Reporter reporter)
throws IOException {
// Process the &lt;key, value&gt; pair (assume this takes a while)
// ...
// ...
// Let the framework know that we are alive, and kicking!
// reporter.progress();
// Process some more
// ...
// ...
// Increment the no. of &lt;key, value&gt; pairs processed
++noRecords;
// Increment counters
reporter.incrCounter(NUM_RECORDS, 1);
// Every 100 records update application-level status
if ((noRecords%100) == 0) {
reporter.setStatus(mapTaskId + " processed " + noRecords +
" from input-file: " + inputFile);
}
// Output the result
output.collect(key, val);
}
}
</pre></blockquote></p>
<p>Applications may write a custom {@link MapRunnable} to exert greater
control on map processing e.g. multi-threaded <code>Mapper</code>s etc.</p>
@see JobConf
@see InputFormat
@see Partitioner
@see Reducer
@see MapReduceBase
@see MapRunnable
@see SequenceFile
@deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Mapper -->
<!-- start class org.apache.hadoop.mapred.MapReduceBase -->
<class name="MapReduceBase" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Closeable"/>
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="MapReduceBase"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Default implementation that does nothing.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Default implementation that does nothing.]]>
</doc>
</method>
<doc>
<![CDATA[Base class for {@link Mapper} and {@link Reducer} implementations.
<p>Provides default no-op implementations for a few methods, most non-trivial
applications need to override some of them.</p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MapReduceBase -->
<!-- start class org.apache.hadoop.mapred.MapReducePolicyProvider -->
<class name="MapReducePolicyProvider" extends="org.apache.hadoop.security.authorize.PolicyProvider"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MapReducePolicyProvider"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getServices" return="org.apache.hadoop.security.authorize.Service[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[{@link PolicyProvider} for Map-Reduce protocols.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MapReducePolicyProvider -->
<!-- start interface org.apache.hadoop.mapred.MapRunnable -->
<interface name="MapRunnable" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead.">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Start mapping input <tt>&lt;key, value&gt;</tt> pairs.
<p>Mapping of input records to output records is complete when this method
returns.</p>
@param input the {@link RecordReader} to read the input records.
@param output the {@link OutputCollector} to collect the outputrecords.
@param reporter {@link Reporter} to report progress, status-updates etc.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Expert: Generic interface for {@link Mapper}s.
<p>Custom implementations of <code>MapRunnable</code> can exert greater
control on map processing e.g. multi-threaded, asynchronous mappers etc.</p>
@see Mapper
@deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.MapRunnable -->
<!-- start class org.apache.hadoop.mapred.MapRunner -->
<class name="MapRunner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
<constructor name="MapRunner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getMapper" return="org.apache.hadoop.mapred.Mapper"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Default {@link MapRunnable} implementation.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MapRunner -->
<!-- start class org.apache.hadoop.mapred.MultiFileInputFormat -->
<class name="MultiFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead">
<constructor name="MultiFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An abstract {@link InputFormat} that returns {@link MultiFileSplit}'s
in {@link #getSplits(JobConf, int)} method. Splits are constructed from
the files under the input paths. Each split returned contains <i>nearly</i>
equal content length. <br>
Subclasses implement {@link #getRecordReader(InputSplit, JobConf, Reporter)}
to construct <code>RecordReader</code>'s for <code>MultiFileSplit</code>'s.
@see MultiFileSplit
@deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} instead]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MultiFileInputFormat -->
<!-- start class org.apache.hadoop.mapred.MultiFileSplit -->
<class name="MultiFileSplit" extends="org.apache.hadoop.mapred.lib.CombineFileSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead">
<constructor name="MultiFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A sub-collection of input files. Unlike {@link FileSplit}, MultiFileSplit
class does not represent a split of a file, but a split of input files
into smaller sets. The atomic unit of split is a file. <br>
MultiFileSplit can be used to implement {@link RecordReader}'s, with
reading one record per file.
@see FileSplit
@see MultiFileInputFormat
@deprecated Use {@link org.apache.hadoop.mapred.lib.CombineFileSplit} instead]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.MultiFileSplit -->
<!-- start interface org.apache.hadoop.mapred.OutputCollector -->
<interface name="OutputCollector" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="collect"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Adds a key/value pair to the output.
@param key the key to collect.
@param value to value to collect.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Collects the <code>&lt;key, value&gt;</code> pairs output by {@link Mapper}s
and {@link Reducer}s.
<p><code>OutputCollector</code> is the generalization of the facility
provided by the Map-Reduce framework to collect data output by either the
<code>Mapper</code> or the <code>Reducer</code> i.e. intermediate outputs
or the output of the job.</p>]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.OutputCollector -->
<!-- start class org.apache.hadoop.mapred.OutputCommitter -->
<class name="OutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.">
<constructor name="OutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setupJob"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For the framework to setup the job output during initialization
@param jobContext Context of the job whose output is being written.
@throws IOException if temporary output could not be created]]>
</doc>
</method>
<method name="cleanupJob"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapred.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For cleaning up the job's output after job completion
@param jobContext Context of the job whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="setupTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Sets up output for the task.
@param taskContext Context of the task whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check whether task needs a commit
@param taskContext
@return true/false
@throws IOException]]>
</doc>
</method>
<method name="commitTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[To promote the task's temporary output to final output location
The task's output is moved to the job's output directory.
@param taskContext Context of the task whose output is being written.
@throws IOException if commit is not]]>
</doc>
</method>
<method name="abortTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapred.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Discard the task output
@param taskContext
@throws IOException]]>
</doc>
</method>
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This method implements the new interface by calling the old method. Note
that the input types are different between the new and old apis and this
is a bridge between the two.]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
the job to:<p>
<ol>
<li>
Setup the job during initialization. For example, create the temporary
output directory for the job during the initialization of the job.
</li>
<li>
Cleanup the job after the job completion. For example, remove the
temporary output directory after the job completion.
</li>
<li>
Setup the task temporary output.
</li>
<li>
Check whether a task needs a commit. This is to avoid the commit
procedure if a task does not need commit.
</li>
<li>
Commit of the task output.
</li>
<li>
Discard the task commit.
</li>
</ol>
@see FileOutputCommitter
@see JobContext
@see TaskAttemptContext
@deprecated Use {@link org.apache.hadoop.mapreduce.OutputCommitter} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.OutputCommitter -->
<!-- start interface org.apache.hadoop.mapred.OutputFormat -->
<interface name="OutputFormat" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.">
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the {@link RecordWriter} for the given job.
@param ignored
@param job configuration for the job whose output is being written.
@param name the unique name for this part of the output.
@param progress mechanism for reporting progress while writing to file.
@return a {@link RecordWriter} to write the output for the job.
@throws IOException]]>
</doc>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check for validity of the output-specification for the job.
<p>This is to validate the output specification for the job when it is
a job is submitted. Typically checks that it does not already exist,
throwing an exception when it already exists, so that output is not
overwritten.</p>
@param ignored
@param job job configuration.
@throws IOException when output should not be attempted]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the output-specification of the job. For e.g. check that the
output directory doesn't already exist.
<li>
Provide the {@link RecordWriter} implementation to be used to write out
the output files of the job. Output files are stored in a
{@link FileSystem}.
</li>
</ol>
@see RecordWriter
@see JobConf
@deprecated Use {@link org.apache.hadoop.mapreduce.OutputFormat} instead.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.OutputFormat -->
<!-- start class org.apache.hadoop.mapred.OutputLogFilter -->
<class name="OutputLogFilter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.fs.PathFilter"/>
<constructor name="OutputLogFilter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="accept" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="path" type="org.apache.hadoop.fs.Path"/>
</method>
<doc>
<![CDATA[This class filters log files from directory given
It doesnt accept paths having _logs.
This can be used to list paths of output directory as follows:
Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir,
new OutputLogFilter()));]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.OutputLogFilter -->
<!-- start interface org.apache.hadoop.mapred.Partitioner -->
<interface name="Partitioner" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="numPartitions" type="int"/>
<doc>
<![CDATA[Get the paritition number for a given key (hence record) given the total
number of partitions i.e. number of reduce-tasks for the job.
<p>Typically a hash function on a all or a subset of the key.</p>
@param key the key to be paritioned.
@param value the entry value.
@param numPartitions the total number of partitions.
@return the partition number for the <code>key</code>.]]>
</doc>
</method>
<doc>
<![CDATA[Partitions the key space.
<p><code>Partitioner</code> controls the partitioning of the keys of the
intermediate map-outputs. The key (or a subset of the key) is used to derive
the partition, typically by a hash function. The total number of partitions
is the same as the number of reduce tasks for the job. Hence this controls
which of the <code>m</code> reduce tasks the intermediate key (and hence the
record) is sent for reduction.</p>
@see Reducer
@deprecated Use {@link org.apache.hadoop.mapreduce.Partitioner} instead.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Partitioner -->
<!-- start interface org.apache.hadoop.mapred.RawKeyValueIterator -->
<interface name="RawKeyValueIterator" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getKey" return="org.apache.hadoop.io.DataInputBuffer"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the current raw key.
@return Gets the current raw key as a DataInputBuffer
@throws IOException]]>
</doc>
</method>
<method name="getValue" return="org.apache.hadoop.io.DataInputBuffer"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the current raw value.
@return Gets the current raw value as a DataInputBuffer
@throws IOException]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Sets up the current key and value (for getKey and getValue).
@return <code>true</code> if there exists a key/value,
<code>false</code> otherwise.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Closes the iterator so that the underlying streams can be closed.
@throws IOException]]>
</doc>
</method>
<method name="getProgress" return="org.apache.hadoop.util.Progress"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Gets the Progress object; this has a float (0.0 - 1.0)
indicating the bytes processed by the iterator so far]]>
</doc>
</method>
<doc>
<![CDATA[<code>RawKeyValueIterator</code> is an iterator used to iterate over
the raw keys and values during sort/merge of intermediate data.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.RawKeyValueIterator -->
<!-- start interface org.apache.hadoop.mapred.RecordReader -->
<interface name="RecordReader" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Reads the next key/value pair from the input for processing.
@param key the key to read data into
@param value the value to read data into
@return true iff a key/value was read, false if at EOF]]>
</doc>
</method>
<method name="createKey" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create an object of the appropriate type to be used as a key.
@return a new key object.]]>
</doc>
</method>
<method name="createValue" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create an object of the appropriate type to be used as a value.
@return a new value object.]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns the current position in the input.
@return the current position in the input.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close this {@link InputSplit} to future operations.
@throws IOException]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[How much of the input has the {@link RecordReader} consumed i.e.
has been processed by?
@return progress from <code>0.0</code> to <code>1.0</code>.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RecordReader</code> reads &lt;key, value&gt; pairs from an
{@link InputSplit}.
<p><code>RecordReader</code>, typically, converts the byte-oriented view of
the input, provided by the <code>InputSplit</code>, and presents a
record-oriented view for the {@link Mapper} & {@link Reducer} tasks for
processing. It thus assumes the responsibility of processing record
boundaries and presenting the tasks with keys and values.</p>
@see InputSplit
@see InputFormat]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.RecordReader -->
<!-- start interface org.apache.hadoop.mapred.RecordWriter -->
<interface name="RecordWriter" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Writes a key/value pair.
@param key the key to write.
@param value the value to write.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close this <code>RecordWriter</code> to future operations.
@param reporter facility to report progress.
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RecordWriter</code> writes the output &lt;key, value&gt; pairs
to an output file.
<p><code>RecordWriter</code> implementations write the job outputs to the
{@link FileSystem}.
@see OutputFormat]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.RecordWriter -->
<!-- start interface org.apache.hadoop.mapred.Reducer -->
<interface name="Reducer" abstract="true"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead.">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<implements name="org.apache.hadoop.io.Closeable"/>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[<i>Reduces</i> values for a given key.
<p>The framework calls this method for each
<code>&lt;key, (list of values)></code> pair in the grouped inputs.
Output values must be of the same type as input values. Input keys must
not be altered. The framework will <b>reuse</b> the key and value objects
that are passed into the reduce, therefore the application should clone
the objects they want to keep a copy of. In many cases, all values are
combined into zero or one value.
</p>
<p>Output pairs are collected with calls to
{@link OutputCollector#collect(Object,Object)}.</p>
<p>Applications can use the {@link Reporter} provided to report progress
or just indicate that they are alive. In scenarios where the application
takes an insignificant amount of time to process individual key/value
pairs, this is crucial since the framework might assume that the task has
timed-out and kill that task. The other way of avoiding this is to set
<a href="{@docRoot}/../mapred-default.html#mapred.task.timeout">
mapred.task.timeout</a> to a high-enough value (or even zero for no
time-outs).</p>
@param key the key.
@param values the list of values to reduce.
@param output to collect keys and combined values.
@param reporter facility to report progress.]]>
</doc>
</method>
<doc>
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
values.
<p>The number of <code>Reducer</code>s for the job is set by the user via
{@link JobConf#setNumReduceTasks(int)}. <code>Reducer</code> implementations
can access the {@link JobConf} for the job via the
{@link JobConfigurable#configure(JobConf)} method and initialize themselves.
Similarly they can use the {@link Closeable#close()} method for
de-initialization.</p>
<p><code>Reducer</code> has 3 primary phases:</p>
<ol>
<li>
<h4 id="Shuffle">Shuffle</h4>
<p><code>Reducer</code> is input the grouped output of a {@link Mapper}.
In the phase the framework, for each <code>Reducer</code>, fetches the
relevant partition of the output of all the <code>Mapper</code>s, via HTTP.
</p>
</li>
<li>
<h4 id="Sort">Sort</h4>
<p>The framework groups <code>Reducer</code> inputs by <code>key</code>s
(since different <code>Mapper</code>s may have output the same key) in this
stage.</p>
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
being fetched they are merged.</p>
<h5 id="SecondarySort">SecondarySort</h5>
<p>If equivalence rules for keys while grouping the intermediates are
different from those for grouping keys before reduction, then one may
specify a <code>Comparator</code> via
{@link JobConf#setOutputValueGroupingComparator(Class)}.Since
{@link JobConf#setOutputKeyComparatorClass(Class)} can be used to
control how intermediate keys are grouped, these can be used in conjunction
to simulate <i>secondary sort on values</i>.</p>
For example, say that you want to find duplicate web pages and tag them
all with the url of the "best" known example. You would set up the job
like:
<ul>
<li>Map Input Key: url</li>
<li>Map Input Value: document</li>
<li>Map Output Key: document checksum, url pagerank</li>
<li>Map Output Value: url</li>
<li>Partitioner: by checksum</li>
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
<li>OutputValueGroupingComparator: by checksum</li>
</ul>
</li>
<li>
<h4 id="Reduce">Reduce</h4>
<p>In this phase the
{@link #reduce(Object, Iterator, OutputCollector, Reporter)}
method is called for each <code>&lt;key, (list of values)></code> pair in
the grouped inputs.</p>
<p>The output of the reduce task is typically written to the
{@link FileSystem} via
{@link OutputCollector#collect(Object, Object)}.</p>
</li>
</ol>
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
<p>Example:</p>
<p><blockquote><pre>
public class MyReducer&lt;K extends WritableComparable, V extends Writable&gt;
extends MapReduceBase implements Reducer&lt;K, V, K, V&gt; {
static enum MyCounters { NUM_RECORDS }
private String reduceTaskId;
private int noKeys = 0;
public void configure(JobConf job) {
reduceTaskId = job.get("mapred.task.id");
}
public void reduce(K key, Iterator&lt;V&gt; values,
OutputCollector&lt;K, V&gt; output,
Reporter reporter)
throws IOException {
// Process
int noValues = 0;
while (values.hasNext()) {
V value = values.next();
// Increment the no. of values for this key
++noValues;
// Process the &lt;key, value&gt; pair (assume this takes a while)
// ...
// ...
// Let the framework know that we are alive, and kicking!
if ((noValues%10) == 0) {
reporter.progress();
}
// Process some more
// ...
// ...
// Output the &lt;key, value&gt;
output.collect(key, value);
}
// Increment the no. of &lt;key, list of values&gt; pairs processed
++noKeys;
// Increment counters
reporter.incrCounter(NUM_RECORDS, 1);
// Every 100 keys update application-level status
if ((noKeys%100) == 0) {
reporter.setStatus(reduceTaskId + " processed " + noKeys);
}
}
}
</pre></blockquote></p>
@see Mapper
@see Partitioner
@see Reporter
@see MapReduceBase
@deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Reducer -->
<!-- start interface org.apache.hadoop.mapred.Reporter -->
<interface name="Reporter" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Progressable"/>
<method name="setStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="java.lang.String"/>
<doc>
<![CDATA[Set the status description for the task.
@param status brief description of the current status.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.Enum"/>
<doc>
<![CDATA[Get the {@link Counter} of the given group with the given name.
@param name counter name
@return the <code>Counter</code> of the given group/name.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapred.Counters.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Get the {@link Counter} of the given group with the given name.
@param group counter group
@param name counter name
@return the <code>Counter</code> of the given group/name.]]>
</doc>
</method>
<method name="incrCounter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the counter identified by the key, which can be of
any {@link Enum} type, by the specified amount.
@param key key to identify the counter to be incremented. The key can be
be any <code>Enum</code>.
@param amount A non-negative amount by which the counter is to
be incremented.]]>
</doc>
</method>
<method name="incrCounter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="counter" type="java.lang.String"/>
<param name="amount" type="long"/>
<doc>
<![CDATA[Increments the counter identified by the group and counter name
by the specified amount.
@param group name to identify the group of the counter to be incremented.
@param counter name to identify the counter within the group.
@param amount A non-negative amount by which the counter is to
be incremented.]]>
</doc>
</method>
<method name="getInputSplit" return="org.apache.hadoop.mapred.InputSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="UnsupportedOperationException" type="java.lang.UnsupportedOperationException"/>
<doc>
<![CDATA[Get the {@link InputSplit} object for a map.
@return the <code>InputSplit</code> that the map is reading from.
@throws UnsupportedOperationException if called outside a mapper]]>
</doc>
</method>
<field name="NULL" type="org.apache.hadoop.mapred.Reporter"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A constant of Reporter type that does nothing.]]>
</doc>
</field>
<doc>
<![CDATA[A facility for Map-Reduce applications to report progress and update
counters, status information etc.
<p>{@link Mapper} and {@link Reducer} can use the <code>Reporter</code>
provided to report progress or just indicate that they are alive. In
scenarios where the application takes an insignificant amount of time to
process individual key/value pairs, this is crucial since the framework
might assume that the task has timed-out and kill that task.
<p>Applications can also update {@link Counters} via the provided
<code>Reporter</code> .</p>
@see Progressable
@see Counters]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.Reporter -->
<!-- start interface org.apache.hadoop.mapred.RunningJob -->
<interface name="RunningJob" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="getID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the job identifier.
@return the job identifier.]]>
</doc>
</method>
<method name="getJobID" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="This method is deprecated and will be removed. Applications should
rather use {@link #getID()}.">
<doc>
<![CDATA[@deprecated This method is deprecated and will be removed. Applications should
rather use {@link #getID()}.]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the name of the job.
@return the name of the job.]]>
</doc>
</method>
<method name="getJobFile" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the path of the submitted job configuration.
@return the path of the submitted job configuration.]]>
</doc>
</method>
<method name="getTrackingURL" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the URL where some job progress information will be displayed.
@return the URL where some job progress information will be displayed.]]>
</doc>
</method>
<method name="mapProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
and 1.0. When all map tasks have completed, the function returns 1.0.
@return the progress of the job's map-tasks.
@throws IOException]]>
</doc>
</method>
<method name="reduceProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
and 1.0. When all reduce tasks have completed, the function returns 1.0.
@return the progress of the job's reduce-tasks.
@throws IOException]]>
</doc>
</method>
<method name="cleanupProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's cleanup-tasks, as a float between 0.0
and 1.0. When all cleanup tasks have completed, the function returns 1.0.
@return the progress of the job's cleanup-tasks.
@throws IOException]]>
</doc>
</method>
<method name="setupProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's setup-tasks, as a float between 0.0
and 1.0. When all setup tasks have completed, the function returns 1.0.
@return the progress of the job's setup-tasks.
@throws IOException]]>
</doc>
</method>
<method name="isComplete" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job is finished or not.
This is a non-blocking call.
@return <code>true</code> if the job is complete, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="isSuccessful" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job completed successfully.
@return <code>true</code> if the job succeeded, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="waitForCompletion"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Blocks until the job is complete.
@throws IOException]]>
</doc>
</method>
<method name="getJobState" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns the current state of the Job.
{@link JobStatus}
@throws IOException]]>
</doc>
</method>
<method name="killJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill the running job. Blocks until all job tasks have been
killed as well. If the job is no longer running, it simply returns.
@throws IOException]]>
</doc>
</method>
<method name="setJobPriority"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="priority" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the priority of a running job.
@param priority the new priority for the job.
@throws IOException]]>
</doc>
</method>
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="startFrom" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get events indicating completion (success/failure) of component tasks.
@param startFrom index to start fetching events from
@return an array of {@link TaskCompletionEvent}s
@throws IOException]]>
</doc>
</method>
<method name="killTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="shouldFail" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill indicated task attempt.
@param taskId the id of the task to be terminated.
@param shouldFail if true the task is failed and added to failed tasks
list, otherwise it is just killed, w/o affecting
job failure status.
@throws IOException]]>
</doc>
</method>
<method name="killTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="Applications should rather use {@link #killTask(TaskAttemptID, boolean)}">
<param name="taskId" type="java.lang.String"/>
<param name="shouldFail" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@deprecated Applications should rather use {@link #killTask(TaskAttemptID, boolean)}]]>
</doc>
</method>
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the counters for this job.
@return the counters for this job.
@throws IOException]]>
</doc>
</method>
<method name="getTaskDiagnostics" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the diagnostic messages for a given task attempt.
@param taskid
@return the list of diagnostic messages for the task
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RunningJob</code> is the user-interface to query for details on a
running Map-Reduce job.
<p>Clients can get hold of <code>RunningJob</code> via the {@link JobClient}
and then query the running-job for details such as name, configuration,
progress etc.</p>
@see JobClient]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.RunningJob -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
<class name="SequenceFileAsBinaryInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsBinaryInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[InputFormat reading keys, values from SequenceFiles in binary (raw)
format.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
<class name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="createKey" return="org.apache.hadoop.io.BytesWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="org.apache.hadoop.io.BytesWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getKeyClassName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Retrieve the name of the key class for this SequenceFile.
@see org.apache.hadoop.io.SequenceFile.Reader#getKeyClassName]]>
</doc>
</method>
<method name="getValueClassName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Retrieve the name of the value class for this SequenceFile.
@see org.apache.hadoop.io.SequenceFile.Reader#getValueClassName]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.BytesWritable"/>
<param name="val" type="org.apache.hadoop.io.BytesWritable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read raw bytes from a SequenceFile.]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the progress within the input split
@return 0.0 to 1.0 of the input byte range]]>
</doc>
</method>
<doc>
<![CDATA[Read records from a SequenceFile as binary (raw) bytes.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryInputFormat.SequenceFileAsBinaryRecordReader -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
<class name="SequenceFileAsBinaryOutputFormat" extends="org.apache.hadoop.mapred.SequenceFileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsBinaryOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setSequenceFileOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the key class for the {@link SequenceFile}
<p>This allows the user to specify the key class to be different
from the actual class ({@link BytesWritable}) used for writing </p>
@param conf the {@link JobConf} to modify
@param theClass the SequenceFile output key class.]]>
</doc>
</method>
<method name="setSequenceFileOutputValueClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="theClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the value class for the {@link SequenceFile}
<p>This allows the user to specify the value class to be different
from the actual class ({@link BytesWritable}) used for writing </p>
@param conf the {@link JobConf} to modify
@param theClass the SequenceFile output key class.]]>
</doc>
</method>
<method name="getSequenceFileOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the key class for the {@link SequenceFile}
@return the key class of the {@link SequenceFile}]]>
</doc>
</method>
<method name="getSequenceFileOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the value class for the {@link SequenceFile}
@return the value class of the {@link SequenceFile}]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes keys, values to
{@link SequenceFile}s in binary(raw) format]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
<class name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.SequenceFile.ValueBytes"/>
<constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="SequenceFileAsBinaryOutputFormat.WritableValueBytes" type="org.apache.hadoop.io.BytesWritable"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="value" type="org.apache.hadoop.io.BytesWritable"/>
</method>
<method name="writeUncompressedBytes"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="outStream" type="java.io.DataOutputStream"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="writeCompressedBytes"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="outStream" type="java.io.DataOutputStream"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getSize" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Inner class used for appendRaw]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsBinaryOutputFormat.WritableValueBytes -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
<class name="SequenceFileAsTextInputFormat" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileAsTextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class is similar to SequenceFileInputFormat, except it generates SequenceFileAsTextRecordReader
which converts the input keys and values to their String forms by calling toString() method.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextInputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
<class name="SequenceFileAsTextRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="SequenceFileAsTextRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="createKey" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read key/value pair in a line.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class converts the input keys and values to their String forms by calling toString()
method. This class to SequenceFileAsTextInputFormat class is as LineRecordReader
class to TextInputFormat class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileAsTextRecordReader -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter -->
<class name="SequenceFileInputFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFilter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a record reader for the given split
@param split file split
@param job job configuration
@param reporter reporter who sends report to task tracker
@return RecordReader]]>
</doc>
</method>
<method name="setFilterClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="filterClass" type="java.lang.Class"/>
<doc>
<![CDATA[set the filter class
@param conf application configuration
@param filterClass filter class]]>
</doc>
</method>
<doc>
<![CDATA[A class that allows a map/red job to work on a sample of sequence files.
The sample is decided by the filter class set by the job.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter -->
<!-- start interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter -->
<interface name="SequenceFileInputFilter.Filter" abstract="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<method name="accept" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<doc>
<![CDATA[filter function
Decide if a record should be filtered or not
@param key record key
@return true if a record is accepted; return false otherwise]]>
</doc>
</method>
<doc>
<![CDATA[filter interface]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.SequenceFileInputFilter.Filter -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase -->
<class name="SequenceFileInputFilter.FilterBase" extends="java.lang.Object"
abstract="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.SequenceFileInputFilter.Filter"/>
<constructor name="SequenceFileInputFilter.FilterBase"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[base class for Filters]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter -->
<class name="SequenceFileInputFilter.MD5Filter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFilter.MD5Filter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setFrequency"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="frequency" type="int"/>
<doc>
<![CDATA[set the filtering frequency in configuration
@param conf configuration
@param frequency filtering frequency]]>
</doc>
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[configure the filter according to configuration
@param conf configuration]]>
</doc>
</method>
<method name="accept" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<doc>
<![CDATA[Filtering method
If MD5(key) % frequency==0, return true; otherwise return false
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
</doc>
</method>
<field name="MD5_LEN" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class returns a set of records by examing the MD5 digest of its
key against a filtering frequency <i>f</i>. The filtering criteria is
MD5(key) % f == 0.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.MD5Filter -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter -->
<class name="SequenceFileInputFilter.PercentFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFilter.PercentFilter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setFrequency"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="frequency" type="int"/>
<doc>
<![CDATA[set the frequency and stores it in conf
@param conf configuration
@param frequency filtering frequencey]]>
</doc>
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[configure the filter by checking the configuration
@param conf configuration]]>
</doc>
</method>
<method name="accept" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<doc>
<![CDATA[Filtering method
If record# % frequency==0, return true; otherwise return false
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
</doc>
</method>
<doc>
<![CDATA[This class returns a percentage of records
The percentage is determined by a filtering frequency <i>f</i> using
the criteria record# % f == 0.
For example, if the frequency is 10, one out of 10 records is returned.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.PercentFilter -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter -->
<class name="SequenceFileInputFilter.RegexFilter" extends="org.apache.hadoop.mapred.SequenceFileInputFilter.FilterBase"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFilter.RegexFilter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setPattern"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="regex" type="java.lang.String"/>
<exception name="PatternSyntaxException" type="java.util.regex.PatternSyntaxException"/>
<doc>
<![CDATA[Define the filtering regex and stores it in conf
@param conf where the regex is set
@param regex regex used as a filter]]>
</doc>
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[configure the Filter by checking the configuration]]>
</doc>
</method>
<method name="accept" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<doc>
<![CDATA[Filtering method
If key matches the regex, return true; otherwise return false
@see org.apache.hadoop.mapred.SequenceFileInputFilter.Filter#accept(Object)]]>
</doc>
</method>
<doc>
<![CDATA[Records filter by matching key to regex]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFilter.RegexFilter -->
<!-- start class org.apache.hadoop.mapred.SequenceFileInputFormat -->
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use
{@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat}
instead.">
<constructor name="SequenceFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="listStatus" return="org.apache.hadoop.fs.FileStatus[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.
@deprecated Use
{@link org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileInputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use
{@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat}
instead.">
<constructor name="SequenceFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getReaders" return="org.apache.hadoop.io.SequenceFile.Reader[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Open the output generated by this format.]]>
</doc>
</method>
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
@param conf the {@link JobConf}
@return the {@link CompressionType} for the output {@link SequenceFile},
defaulting to {@link CompressionType#RECORD}]]>
</doc>
</method>
<method name="setOutputCompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
<doc>
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
@param conf the {@link JobConf} to modify
@param style the {@link CompressionType} for the output
{@link SequenceFile}]]>
</doc>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.
@deprecated Use
{@link org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileOutputFormat -->
<!-- start class org.apache.hadoop.mapred.SequenceFileRecordReader -->
<class name="SequenceFileRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="SequenceFileRecordReader" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapred.FileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="getKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The class of key that must be passed to {@link
#next(Object, Object)}..]]>
</doc>
</method>
<method name="getValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The class of value that must be passed to {@link
#next(Object, Object)}..]]>
</doc>
</method>
<method name="createKey" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getCurrentValue"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the progress within the input split
@return 0.0 to 1.0 of the input byte range]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="seek"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="pos" type="long"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="conf" type="org.apache.hadoop.conf.Configuration"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SequenceFileRecordReader -->
<!-- start class org.apache.hadoop.mapred.SkipBadRecords -->
<class name="SkipBadRecords" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SkipBadRecords"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getAttemptsToStartSkipping" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the number of Task attempts AFTER which skip mode
will be kicked off. When skip mode is kicked off, the
tasks reports the range of records which it will process
next to the TaskTracker. So that on failures, TT knows which
ones are possibly the bad records. On further executions,
those are skipped.
Default value is 2.
@param conf the configuration
@return attemptsToStartSkipping no of task attempts]]>
</doc>
</method>
<method name="setAttemptsToStartSkipping"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="attemptsToStartSkipping" type="int"/>
<doc>
<![CDATA[Set the number of Task attempts AFTER which skip mode
will be kicked off. When skip mode is kicked off, the
tasks reports the range of records which it will process
next to the TaskTracker. So that on failures, TT knows which
ones are possibly the bad records. On further executions,
those are skipped.
Default value is 2.
@param conf the configuration
@param attemptsToStartSkipping no of task attempts]]>
</doc>
</method>
<method name="getAutoIncrMapperProcCount" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the flag which if set to true,
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
by MapRunner after invoking the map function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@return <code>true</code> if auto increment
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setAutoIncrMapperProcCount"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="autoIncr" type="boolean"/>
<doc>
<![CDATA[Set the flag which if set to true,
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS} is incremented
by MapRunner after invoking the map function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@param autoIncr whether to auto increment
{@link SkipBadRecords#COUNTER_MAP_PROCESSED_RECORDS}.]]>
</doc>
</method>
<method name="getAutoIncrReducerProcCount" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the flag which if set to true,
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
by framework after invoking the reduce function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@return <code>true</code> if auto increment
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.
<code>false</code> otherwise.]]>
</doc>
</method>
<method name="setAutoIncrReducerProcCount"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="autoIncr" type="boolean"/>
<doc>
<![CDATA[Set the flag which if set to true,
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS} is incremented
by framework after invoking the reduce function. This value must be set to
false for applications which process the records asynchronously
or buffer the input records. For example streaming.
In such cases applications should increment this counter on their own.
Default value is true.
@param conf the configuration
@param autoIncr whether to auto increment
{@link SkipBadRecords#COUNTER_REDUCE_PROCESSED_GROUPS}.]]>
</doc>
</method>
<method name="getSkipOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the directory to which skipped records are written. By default it is
the sub directory of the output _logs directory.
User can stop writing skipped records by setting the value null.
@param conf the configuration.
@return path skip output directory. Null is returned if this is not set
and output directory is also not set.]]>
</doc>
</method>
<method name="setSkipOutputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the directory to which skipped records are written. By default it is
the sub directory of the output _logs directory.
User can stop writing skipped records by setting the value null.
@param conf the configuration.
@param path skip output directory path]]>
</doc>
</method>
<method name="getMapperMaxSkipRecords" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the number of acceptable skip records surrounding the bad record PER
bad record in mapper. The number includes the bad record as well.
To turn the feature of detection/skipping of bad records off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever records(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@return maxSkipRecs acceptable skip records.]]>
</doc>
</method>
<method name="setMapperMaxSkipRecords"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="maxSkipRecs" type="long"/>
<doc>
<![CDATA[Set the number of acceptable skip records surrounding the bad record PER
bad record in mapper. The number includes the bad record as well.
To turn the feature of detection/skipping of bad records off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever records(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@param maxSkipRecs acceptable skip records.]]>
</doc>
</method>
<method name="getReducerMaxSkipGroups" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[Get the number of acceptable skip groups surrounding the bad group PER
bad group in reducer. The number includes the bad group as well.
To turn the feature of detection/skipping of bad groups off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever groups(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@return maxSkipGrps acceptable skip groups.]]>
</doc>
</method>
<method name="setReducerMaxSkipGroups"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="maxSkipGrps" type="long"/>
<doc>
<![CDATA[Set the number of acceptable skip groups surrounding the bad group PER
bad group in reducer. The number includes the bad group as well.
To turn the feature of detection/skipping of bad groups off, set the
value to 0.
The framework tries to narrow down the skipped range by retrying
until this threshold is met OR all attempts get exhausted for this task.
Set the value to Long.MAX_VALUE to indicate that framework need not try to
narrow down. Whatever groups(depends on application) get skipped are
acceptable.
Default value is 0.
@param conf the configuration
@param maxSkipGrps acceptable skip groups.]]>
</doc>
</method>
<field name="COUNTER_GROUP" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Special counters which are written by the application and are
used by the framework for detecting bad records. For detecting bad records
these counters must be incremented by the application.]]>
</doc>
</field>
<field name="COUNTER_MAP_PROCESSED_RECORDS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Number of processed map records.
@see SkipBadRecords#getAutoIncrMapperProcCount(Configuration)]]>
</doc>
</field>
<field name="COUNTER_REDUCE_PROCESSED_GROUPS" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Number of processed reduce groups.
@see SkipBadRecords#getAutoIncrReducerProcCount(Configuration)]]>
</doc>
</field>
<doc>
<![CDATA[Utility class for skip bad records functionality. It contains various
settings related to skipping of bad records.
<p>Hadoop provides an optional mode of execution in which the bad records
are detected and skipped in further attempts.
<p>This feature can be used when map/reduce tasks crashes deterministically on
certain input. This happens due to bugs in the map/reduce function. The usual
course would be to fix these bugs. But sometimes this is not possible;
perhaps the bug is in third party libraries for which the source code is
not available. Due to this, the task never reaches to completion even with
multiple attempts and complete data for that task is lost.</p>
<p>With this feature, only a small portion of data is lost surrounding
the bad record, which may be acceptable for some user applications.
see {@link SkipBadRecords#setMapperMaxSkipRecords(Configuration, long)}</p>
<p>The skipping mode gets kicked off after certain no of failures
see {@link SkipBadRecords#setAttemptsToStartSkipping(Configuration, int)}</p>
<p>In the skipping mode, the map/reduce task maintains the record range which
is getting processed at all times. Before giving the input to the
map/reduce function, it sends this record range to the Task tracker.
If task crashes, the Task tracker knows which one was the last reported
range. On further attempts that range get skipped.</p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.SkipBadRecords -->
<!-- start class org.apache.hadoop.mapred.TaskAttemptContext -->
<class name="TaskAttemptContext" extends="org.apache.hadoop.mapreduce.TaskAttemptContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext}
instead.">
<method name="getTaskAttemptID" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the taskAttemptID.
@return TaskAttemptID]]>
</doc>
</method>
<method name="getProgressible" return="org.apache.hadoop.util.Progressable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="progress"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[@deprecated Use {@link org.apache.hadoop.mapreduce.TaskAttemptContext}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskAttemptContext -->
<!-- start class org.apache.hadoop.mapred.TaskAttemptID -->
<class name="TaskAttemptID" extends="org.apache.hadoop.mapreduce.TaskAttemptID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapred.TaskID, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
@param taskId TaskID that this task belongs to
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param taskId taskId number
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="downgrade" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="old" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<doc>
<![CDATA[Downgrade a new TaskAttemptID to an old one
@param old the new id
@return either old or a new TaskAttemptID constructed to match old]]>
</doc>
</method>
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="read" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a TaskAttemptID object from given string
@return constructed TaskAttemptID object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<method name="getTaskAttemptIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<param name="isMap" type="java.lang.Boolean"/>
<param name="taskId" type="java.lang.Integer"/>
<param name="attemptId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task attempt IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>all task attempt IDs</i>
of <i>any jobtracker</i>, in <i>any job</i>, of the <i>first
map task</i>, we would use :
<pre>
TaskAttemptID.getTaskAttemptIDsPattern(null, null, true, 1, null);
</pre>
which will return :
<pre> "attempt_[^_]*_[0-9]*_m_000001_[0-9]*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@param isMap whether the tip is a map, or null
@param taskId taskId number, or null
@param attemptId the task attempt number, or null
@return a regex pattern matching TaskAttemptIDs]]>
</doc>
</method>
<doc>
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
a task attempt. Each task attempt is one particular instance of a Map or
Reduce Task identified by its TaskID.
TaskAttemptID consists of 2 parts. First part is the
{@link TaskID}, that this TaskAttemptID belongs to.
Second part is the task attempt number. <br>
An example TaskAttemptID is :
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
zeroth task attempt for the fifth map task in the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskAttemptID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskAttemptID -->
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent -->
<class name="TaskCompletionEvent" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="TaskCompletionEvent"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default constructor for Writable.]]>
</doc>
</constructor>
<constructor name="TaskCompletionEvent" type="int, org.apache.hadoop.mapred.TaskAttemptID, int, boolean, org.apache.hadoop.mapred.TaskCompletionEvent.Status, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructor. eventId should be created externally and incremented
per event for each job.
@param eventId event id, event id should be unique and assigned in
incrementally, starting from 0.
@param taskId task id
@param status task's status
@param taskTrackerHttp task tracker's host:port for http.]]>
</doc>
</constructor>
<method name="getEventId" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns event Id.
@return event id]]>
</doc>
</method>
<method name="getTaskId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #getTaskAttemptId()} instead.">
<doc>
<![CDATA[Returns task id.
@return task id
@deprecated use {@link #getTaskAttemptId()} instead.]]>
</doc>
</method>
<method name="getTaskAttemptId" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns task id.
@return task id]]>
</doc>
</method>
<method name="getTaskStatus" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns enum Status.SUCESS or Status.FAILURE.
@return task tracker status]]>
</doc>
</method>
<method name="getTaskTrackerHttp" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[http location of the tasktracker where this task ran.
@return http location of tasktracker user logs]]>
</doc>
</method>
<method name="getTaskRunTime" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns time (in millisec) the task took to complete.]]>
</doc>
</method>
<method name="setTaskRunTime"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskCompletionTime" type="int"/>
<doc>
<![CDATA[Set the task completion time
@param taskCompletionTime time (in millisec) the task took to complete]]>
</doc>
</method>
<method name="setEventId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="eventId" type="int"/>
<doc>
<![CDATA[set event Id. should be assigned incrementally starting from 0.
@param eventId]]>
</doc>
</method>
<method name="setTaskId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #setTaskID(TaskAttemptID)} instead.">
<param name="taskId" type="java.lang.String"/>
<doc>
<![CDATA[Sets task id.
@param taskId
@deprecated use {@link #setTaskID(TaskAttemptID)} instead.]]>
</doc>
</method>
<method name="setTaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[Sets task id.
@param taskId]]>
</doc>
</method>
<method name="setTaskStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"/>
<doc>
<![CDATA[Set task status.
@param status]]>
</doc>
</method>
<method name="setTaskTrackerHttp"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskTrackerHttp" type="java.lang.String"/>
<doc>
<![CDATA[Set task tracker http location.
@param taskTrackerHttp]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="isMapTask" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="idWithinJob" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="EMPTY_ARRAY" type="org.apache.hadoop.mapred.TaskCompletionEvent[]"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This is used to track task completion events on
job tracker.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent -->
<!-- start class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
<class name="TaskCompletionEvent.Status" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="FAILED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="KILLED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="SUCCEEDED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="OBSOLETE" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="TIPFAILED" type="org.apache.hadoop.mapred.TaskCompletionEvent.Status"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.TaskCompletionEvent.Status -->
<!-- start class org.apache.hadoop.mapred.TaskGraphServlet -->
<class name="TaskGraphServlet" extends="javax.servlet.http.HttpServlet"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskGraphServlet"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="doGet"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="request" type="javax.servlet.http.HttpServletRequest"/>
<param name="response" type="javax.servlet.http.HttpServletResponse"/>
<exception name="ServletException" type="javax.servlet.ServletException"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="width" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[height of the graph w/o margins]]>
</doc>
</field>
<field name="height" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[height of the graph w/o margins]]>
</doc>
</field>
<field name="ymargin" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[margin space on y axis]]>
</doc>
</field>
<field name="xmargin" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[margin space on x axis]]>
</doc>
</field>
<doc>
<![CDATA[The servlet that outputs svg graphics for map / reduce task
statuses]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskGraphServlet -->
<!-- start class org.apache.hadoop.mapred.TaskID -->
<class name="TaskID" extends="org.apache.hadoop.mapreduce.TaskID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskID object from given {@link JobID}.
@param jobId JobID that this tip belongs to
@param isMap whether the tip is a map
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID" type="java.lang.String, int, boolean, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskInProgressId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="downgrade" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="old" type="org.apache.hadoop.mapreduce.TaskID"/>
<doc>
<![CDATA[Downgrade a new TaskID to an old one
@param old a new or old TaskID
@return either old or a new TaskID build to match old]]>
</doc>
</method>
<method name="read" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getTaskIDsPattern" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="jtIdentifier" type="java.lang.String"/>
<param name="jobId" type="java.lang.Integer"/>
<param name="isMap" type="java.lang.Boolean"/>
<param name="taskId" type="java.lang.Integer"/>
<doc>
<![CDATA[Returns a regex pattern which matches task IDs. Arguments can
be given null, in which case that part of the regex will be generic.
For example to obtain a regex matching <i>the first map task</i>
of <i>any jobtracker</i>, of <i>any job</i>, we would use :
<pre>
TaskID.getTaskIDsPattern(null, null, true, 1);
</pre>
which will return :
<pre> "task_[^_]*_[0-9]*_m_000001*" </pre>
@param jtIdentifier jobTracker identifier, or null
@param jobId job number, or null
@param isMap whether the tip is a map, or null
@param taskId taskId number, or null
@return a regex pattern matching TaskIDs]]>
</doc>
</method>
<method name="forName" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
</method>
<doc>
<![CDATA[TaskID represents the immutable and unique identifier for
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
execute the Map or Reduce Task, each of which are uniquely indentified by
their TaskAttemptID.
TaskID consists of 3 parts. First part is the {@link JobID}, that this
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
representing whether the task is a map task or a reduce task.
And the third part is the task number. <br>
An example TaskID is :
<code>task_200707121733_0003_m_000005</code> , which represents the
fifth map task in the third job running at the jobtracker
started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskID -->
<!-- start class org.apache.hadoop.mapred.TaskLog -->
<class name="TaskLog" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskLog"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getTaskLogFile" return="java.io.File"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="filter" type="org.apache.hadoop.mapred.TaskLog.LogName"/>
</method>
<method name="getRealTaskLogFileLocation" return="java.io.File"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="filter" type="org.apache.hadoop.mapred.TaskLog.LogName"/>
</method>
<method name="getIndexFile" return="java.io.File"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="java.lang.String"/>
</method>
<method name="getIndexFile" return="java.io.File"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="java.lang.String"/>
<param name="isCleanup" type="boolean"/>
</method>
<method name="syncLogs"
abstract="false" native="false" synchronized="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="firstTaskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="syncLogs"
abstract="false" native="false" synchronized="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="firstTaskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="isCleanup" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="cleanup"
abstract="false" native="false" synchronized="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="logsRetainHours" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Purge old user logs.
@throws IOException]]>
</doc>
</method>
<method name="getTaskLogLength" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the desired maximum length of task's logs.
@param conf the job to look in
@return the number of bytes to cap the log files at]]>
</doc>
</method>
<method name="captureOutAndError" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="cmd" type="java.util.List"/>
<param name="stdoutFilename" type="java.io.File"/>
<param name="stderrFilename" type="java.io.File"/>
<param name="tailLength" type="long"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Wrap a command in a shell to capture stdout and stderr to files.
If the tailLength is 0, the entire output will be saved.
@param cmd The command and the arguments that should be run
@param stdoutFilename The filename that stdout should be saved to
@param stderrFilename The filename that stderr should be saved to
@param tailLength The length of the tail to be saved.
@return the modified command that should be run]]>
</doc>
</method>
<method name="captureOutAndError" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="setup" type="java.util.List"/>
<param name="cmd" type="java.util.List"/>
<param name="stdoutFilename" type="java.io.File"/>
<param name="stderrFilename" type="java.io.File"/>
<param name="tailLength" type="long"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Wrap a command in a shell to capture stdout and stderr to files.
Setup commands such as setting memory limit can be passed which
will be executed before exec.
If the tailLength is 0, the entire output will be saved.
@param setup The setup commands for the execed process.
@param cmd The command and the arguments that should be run
@param stdoutFilename The filename that stdout should be saved to
@param stderrFilename The filename that stderr should be saved to
@param tailLength The length of the tail to be saved.
@return the modified command that should be run]]>
</doc>
</method>
<method name="captureOutAndError" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="setup" type="java.util.List"/>
<param name="cmd" type="java.util.List"/>
<param name="stdoutFilename" type="java.io.File"/>
<param name="stderrFilename" type="java.io.File"/>
<param name="tailLength" type="long"/>
<param name="pidFileName" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Wrap a command in a shell to capture stdout and stderr to files.
Setup commands such as setting memory limit can be passed which
will be executed before exec.
If the tailLength is 0, the entire output will be saved.
@param setup The setup commands for the execed process.
@param cmd The command and the arguments that should be run
@param stdoutFilename The filename that stdout should be saved to
@param stderrFilename The filename that stderr should be saved to
@param tailLength The length of the tail to be saved.
@param pidFileName The name of the pid-file
@return the modified command that should be run]]>
</doc>
</method>
<method name="addCommand" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="cmd" type="java.util.List"/>
<param name="isExecutable" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add quotes to each of the command strings and
return as a single string
@param cmd The command to be quoted
@param isExecutable makes shell path if the first
argument is executable
@return returns The quoted string.
@throws IOException]]>
</doc>
</method>
<method name="captureDebugOut" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="cmd" type="java.util.List"/>
<param name="debugoutFilename" type="java.io.File"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Wrap a command in a shell to capture debug script's
stdout and stderr to debugout.
@param cmd The command and the arguments that should be run
@param debugoutFilename The filename that stdout and stderr
should be saved to.
@return the modified command that should be run
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[A simple logger to handle the task-specific user logs.
This class uses the system property <code>hadoop.log.dir</code>.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskLog -->
<!-- start class org.apache.hadoop.mapred.TaskLog.LogName -->
<class name="TaskLog.LogName" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.TaskLog.LogName[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.TaskLog.LogName"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<field name="STDOUT" type="org.apache.hadoop.mapred.TaskLog.LogName"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Log on the stdout of the task.]]>
</doc>
</field>
<field name="STDERR" type="org.apache.hadoop.mapred.TaskLog.LogName"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Log on the stderr of the task.]]>
</doc>
</field>
<field name="SYSLOG" type="org.apache.hadoop.mapred.TaskLog.LogName"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Log on the map-reduce system logs of the task.]]>
</doc>
</field>
<field name="PROFILE" type="org.apache.hadoop.mapred.TaskLog.LogName"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The java profiler information.]]>
</doc>
</field>
<field name="DEBUGOUT" type="org.apache.hadoop.mapred.TaskLog.LogName"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Log the debug script's stdout]]>
</doc>
</field>
<doc>
<![CDATA[The filter for userlogs.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskLog.LogName -->
<!-- start class org.apache.hadoop.mapred.TaskLogAppender -->
<class name="TaskLogAppender" extends="org.apache.log4j.FileAppender"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskLogAppender"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="activateOptions"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="append"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="event" type="org.apache.log4j.spi.LoggingEvent"/>
</method>
<method name="flush"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getTaskId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Getter/Setter methods for log4j.]]>
</doc>
</method>
<method name="setTaskId"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="java.lang.String"/>
</method>
<method name="getTotalLogFileSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setTotalLogFileSize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="logSize" type="long"/>
</method>
<doc>
<![CDATA[A simple log4j-appender for the task child's
map-reduce system logs.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskLogAppender -->
<!-- start class org.apache.hadoop.mapred.TaskLogServlet -->
<class name="TaskLogServlet" extends="javax.servlet.http.HttpServlet"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskLogServlet"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getTaskLogUrl" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskTrackerHostName" type="java.lang.String"/>
<param name="httpPort" type="java.lang.String"/>
<param name="taskAttemptID" type="java.lang.String"/>
<doc>
<![CDATA[Construct the taskLogUrl
@param taskTrackerHostName
@param httpPort
@param taskAttemptID
@return the taskLogUrl]]>
</doc>
</method>
<method name="doGet"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="request" type="javax.servlet.http.HttpServletRequest"/>
<param name="response" type="javax.servlet.http.HttpServletResponse"/>
<exception name="ServletException" type="javax.servlet.ServletException"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the logs via http.]]>
</doc>
</method>
<doc>
<![CDATA[A servlet that is run by the TaskTrackers to provide the task logs via http.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskLogServlet -->
<!-- start class org.apache.hadoop.mapred.TaskReport -->
<class name="TaskReport" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="TaskReport"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getTaskId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #getTaskID()} instead">
<doc>
<![CDATA[@deprecated use {@link #getTaskID()} instead]]>
</doc>
</method>
<method name="getTaskID" return="org.apache.hadoop.mapred.TaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The id of the task.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The amount completed, between zero and one.]]>
</doc>
</method>
<method name="getState" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The most recent state, reported by a {@link Reporter}.]]>
</doc>
</method>
<method name="getDiagnostics" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A list of error messages.]]>
</doc>
</method>
<method name="getCounters" return="org.apache.hadoop.mapred.Counters"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[A table of counters.]]>
</doc>
</method>
<method name="getCurrentStatus" return="org.apache.hadoop.mapred.TIPStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The current status]]>
</doc>
</method>
<method name="getFinishTime" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get finish time of task.
@return 0, if finish time was not set else returns finish time.]]>
</doc>
</method>
<method name="getStartTime" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get start time of task.
@return 0 if start time was not set, else start time.]]>
</doc>
</method>
<method name="setSuccessfulAttempt"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="t" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[set successful attempt ID of the task.]]>
</doc>
</method>
<method name="getSuccessfulTaskAttempt" return="org.apache.hadoop.mapred.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the attempt ID that took this task to completion]]>
</doc>
</method>
<method name="setRunningTaskAttempts"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="runningAttempts" type="java.util.Collection"/>
<doc>
<![CDATA[set running attempt(s) of the task.]]>
</doc>
</method>
<method name="getRunningTaskAttempts" return="java.util.Collection"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the running task attempt IDs for this task]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A report on the state of a task.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskReport -->
<!-- start class org.apache.hadoop.mapred.TaskTracker -->
<class name="TaskTracker" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.MRConstants"/>
<implements name="org.apache.hadoop.mapred.TaskUmbilicalProtocol"/>
<implements name="java.lang.Runnable"/>
<constructor name="TaskTracker" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Start with the local machine name, and the default JobTracker]]>
</doc>
</constructor>
<method name="getTaskTrackerInstrumentation" return="org.apache.hadoop.mapred.TaskTrackerInstrumentation"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getProtocolVersion" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="protocol" type="java.lang.String"/>
<param name="clientVersion" type="long"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getInstrumentationClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="setInstrumentationClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="t" type="java.lang.Class"/>
</method>
<method name="cleanupStorage"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Removes all contents of temporary storage. Called upon
startup, to remove any leftovers from previous run.]]>
</doc>
</method>
<method name="shutdown"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close down the TaskTracker and all its components. We must also shutdown
any running tasks or threads, and cleanup disk space. A new TaskTracker
within the same process space might be restarted, so everything must be
clean.]]>
</doc>
</method>
<method name="getJobClient" return="org.apache.hadoop.mapred.InterTrackerProtocol"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The connection to the JobTracker, used by the TaskRunner
for locating remote files.]]>
</doc>
</method>
<method name="getTaskTrackerReportAddress" return="java.net.InetSocketAddress"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the port at which the tasktracker bound to]]>
</doc>
</method>
<method name="getJvmManagerInstance" return="org.apache.hadoop.mapred.JvmManager"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The server retry loop.
This while-loop attempts to connect to the JobTracker. It only
loops when the old TaskTracker has gone bad (its state is
stale somehow) and we need to reinitialize everything.]]>
</doc>
</method>
<method name="getTask" return="org.apache.hadoop.mapred.JvmTask"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jvmId" type="org.apache.hadoop.mapred.JVMId"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Called upon startup by the child process, to fetch Task data.]]>
</doc>
</method>
<method name="statusUpdate" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="taskStatus" type="org.apache.hadoop.mapred.TaskStatus"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Called periodically to report Task progress, from 0.0 to 1.0.]]>
</doc>
</method>
<method name="reportDiagnosticInfo"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="info" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Called when the task dies before completion, and we want to report back
diagnostic info]]>
</doc>
</method>
<method name="reportNextRecordRange"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="range" type="org.apache.hadoop.mapred.SortedRanges.Range"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="ping" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Child checking to see if we're alive. Normally does nothing.]]>
</doc>
</method>
<method name="commitPending"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="taskStatus" type="org.apache.hadoop.mapred.TaskStatus"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Task is reporting that it is in commit_pending
and it is waiting for the commit Response]]>
</doc>
</method>
<method name="canCommit" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<doc>
<![CDATA[Child checking whether it can commit]]>
</doc>
</method>
<method name="done"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The task is done.]]>
</doc>
</method>
<method name="shuffleError"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="message" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[A reduce-task failed to shuffle the map-outputs. Kill the task.]]>
</doc>
</method>
<method name="fsError"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="message" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[A child task had a local filesystem error. Kill the task.]]>
</doc>
</method>
<method name="getMapCompletionEvents" return="org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobId" type="org.apache.hadoop.mapred.JobID"/>
<param name="fromEventId" type="int"/>
<param name="maxLocs" type="int"/>
<param name="id" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="mapOutputLost"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskid" type="org.apache.hadoop.mapred.TaskAttemptID"/>
<param name="errorMsg" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[A completed map task's output has been lost.]]>
</doc>
</method>
<method name="isIdle" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Is this task tracker idle?
@return has this task tracker finished and cleaned up all of its tasks?]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="argv" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<doc>
<![CDATA[Start the TaskTracker, point toward the indicated JobTracker]]>
</doc>
</method>
<method name="isTaskMemoryManagerEnabled" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Is the TaskMemoryManager Enabled on this system?
@return true if enabled, false otherwise.]]>
</doc>
</method>
<method name="getTaskMemoryManager" return="org.apache.hadoop.mapred.TaskMemoryManagerThread"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="MR_CLIENTTRACE_FORMAT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="ClientTraceLog" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[TaskTracker is a process that starts and tracks MR Tasks
in a networked environment. It contacts the JobTracker
for Task assignments and reporting results.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskTracker -->
<!-- start class org.apache.hadoop.mapred.TaskTracker.MapOutputServlet -->
<class name="TaskTracker.MapOutputServlet" extends="javax.servlet.http.HttpServlet"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskTracker.MapOutputServlet"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="doGet"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="request" type="javax.servlet.http.HttpServletRequest"/>
<param name="response" type="javax.servlet.http.HttpServletResponse"/>
<exception name="ServletException" type="javax.servlet.ServletException"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class is used in TaskTracker's Jetty to serve the map outputs
to other nodes.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TaskTracker.MapOutputServlet -->
<!-- start class org.apache.hadoop.mapred.TextInputFormat -->
<class name="TextInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat}
instead.">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="TextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
Either linefeed or carriage-return are used to signal end of line. Keys are
the position in the file, and values are the line of text..
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.input.TextInputFormat}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TextInputFormat -->
<!-- start class org.apache.hadoop.mapred.TextOutputFormat -->
<class name="TextOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use
{@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead.">
<constructor name="TextOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes plain text files.
@deprecated Use
{@link org.apache.hadoop.mapreduce.lib.output.TextOutputFormat} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TextOutputFormat -->
<!-- start class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter -->
<class name="TextOutputFormat.LineRecordWriter" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordWriter"/>
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="out" type="java.io.DataOutputStream"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.TextOutputFormat.LineRecordWriter -->
<!-- start class org.apache.hadoop.mapred.TIPStatus -->
<class name="TIPStatus" extends="java.lang.Enum"
abstract="false"
static="false" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.TIPStatus[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.TIPStatus"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="PENDING" type="org.apache.hadoop.mapred.TIPStatus"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RUNNING" type="org.apache.hadoop.mapred.TIPStatus"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="COMPLETE" type="org.apache.hadoop.mapred.TIPStatus"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="KILLED" type="org.apache.hadoop.mapred.TIPStatus"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED" type="org.apache.hadoop.mapred.TIPStatus"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[The states of a {@link TaskInProgress} as seen by the JobTracker.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.TIPStatus -->
</package>
<package name="org.apache.hadoop.mapred.jobcontrol">
<!-- start class org.apache.hadoop.mapred.jobcontrol.Job -->
<class name="Job" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf, java.util.ArrayList"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Construct a job.
@param jobConf a mapred job configuration representing a job to be executed.
@param dependingJobs an array of jobs the current job depends on]]>
</doc>
</constructor>
<constructor name="Job" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Construct a job.
@param jobConf mapred job configuration representing a job to be executed.
@throws IOException]]>
</doc>
</constructor>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the job name of this job]]>
</doc>
</method>
<method name="setJobName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobName" type="java.lang.String"/>
<doc>
<![CDATA[Set the job name for this job.
@param jobName the job name]]>
</doc>
</method>
<method name="getJobID" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the job ID of this job assigned by JobControl]]>
</doc>
</method>
<method name="setJobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="id" type="java.lang.String"/>
<doc>
<![CDATA[Set the job ID for this job.
@param id the job ID]]>
</doc>
</method>
<method name="getMapredJobID" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #getAssignedJobID()} instead">
<doc>
<![CDATA[@return the mapred ID of this job
@deprecated use {@link #getAssignedJobID()} instead]]>
</doc>
</method>
<method name="setMapredJobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="use {@link #setAssignedJobID(JobID)} instead">
<param name="mapredJobID" type="java.lang.String"/>
<doc>
<![CDATA[Set the mapred ID for this job.
@param mapredJobID the mapred job ID for this job.
@deprecated use {@link #setAssignedJobID(JobID)} instead]]>
</doc>
</method>
<method name="getAssignedJobID" return="org.apache.hadoop.mapred.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the mapred ID of this job as assigned by the
mapred framework.]]>
</doc>
</method>
<method name="setAssignedJobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="mapredJobID" type="org.apache.hadoop.mapred.JobID"/>
<doc>
<![CDATA[Set the mapred ID for this job as assigned by the
mapred framework.
@param mapredJobID the mapred job ID for this job.]]>
</doc>
</method>
<method name="getJobConf" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the mapred job conf of this job]]>
</doc>
</method>
<method name="setJobConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Set the mapred job conf for this job.
@param jobConf the mapred job conf for this job.]]>
</doc>
</method>
<method name="getState" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the state of this job]]>
</doc>
</method>
<method name="setState"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="state" type="int"/>
<doc>
<![CDATA[Set the state for this job.
@param state the new state for this job.]]>
</doc>
</method>
<method name="getMessage" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the message of this job]]>
</doc>
</method>
<method name="setMessage"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="message" type="java.lang.String"/>
<doc>
<![CDATA[Set the message for this job.
@param message the message for this job.]]>
</doc>
</method>
<method name="getJobClient" return="org.apache.hadoop.mapred.JobClient"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the job client of this job]]>
</doc>
</method>
<method name="getDependingJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the depending jobs of this job]]>
</doc>
</method>
<method name="addDependingJob" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dependingJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/>
<doc>
<![CDATA[Add a job to this jobs' dependency list. Dependent jobs can only be added while a Job
is waiting to run, not during or afterwards.
@param dependingJob Job that this Job depends on.
@return <tt>true</tt> if the Job was added.]]>
</doc>
</method>
<method name="isCompleted" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return true if this job is in a complete state]]>
</doc>
</method>
<method name="isReady" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return true if this job is in READY state]]>
</doc>
</method>
<method name="submit"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Submit this job to mapred. The state becomes RUNNING if submission
is successful, FAILED otherwise.]]>
</doc>
</method>
<field name="SUCCESS" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="WAITING" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RUNNING" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="READY" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="FAILED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DEPENDENT_FAILED" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class encapsulates a MapReduce job and its dependency. It monitors
the states of the depending jobs and updates the state of this job.
A job starts in the WAITING state. If it does not have any depending jobs, or
all of the depending jobs are in SUCCESS state, then the job state will become
READY. If any depending jobs fail, the job will fail too.
When in READY state, the job can be submitted to Hadoop for execution, with
the state changing into RUNNING state. From RUNNING state, the job can get into
SUCCESS or FAILED state, depending the status of the job execution.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.jobcontrol.Job -->
<!-- start class org.apache.hadoop.mapred.jobcontrol.JobControl -->
<class name="JobControl" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.Runnable"/>
<constructor name="JobControl" type="java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Construct a job control for a group of jobs.
@param groupName a name identifying this group]]>
</doc>
</constructor>
<method name="getWaitingJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the waiting state]]>
</doc>
</method>
<method name="getRunningJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the running state]]>
</doc>
</method>
<method name="getReadyJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the ready state]]>
</doc>
</method>
<method name="getSuccessfulJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the jobs in the success state]]>
</doc>
</method>
<method name="getFailedJobs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="addJob" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="aJob" type="org.apache.hadoop.mapred.jobcontrol.Job"/>
<doc>
<![CDATA[Add a new job.
@param aJob the new job]]>
</doc>
</method>
<method name="addJobs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobs" type="java.util.Collection"/>
<doc>
<![CDATA[Add a collection of jobs
@param jobs]]>
</doc>
</method>
<method name="getState" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the thread state]]>
</doc>
</method>
<method name="stop"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[set the thread state to STOPPING so that the
thread will stop when it wakes up.]]>
</doc>
</method>
<method name="suspend"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[suspend the running thread]]>
</doc>
</method>
<method name="resume"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[resume the suspended thread]]>
</doc>
</method>
<method name="allFinished" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The main loop for the thread.
The loop does the following:
Check the states of the running jobs
Update the states of waiting jobs
Submit the jobs in ready state]]>
</doc>
</method>
<doc>
<![CDATA[This class encapsulates a set of MapReduce jobs and its dependency. It tracks
the states of the jobs by placing them into different tables according to their
states.
This class provides APIs for the client app to add a job to the group and to get
the jobs in the group in different states. When a
job is added, an ID unique to the group is assigned to the job.
This class has a thread that submits jobs when they become ready, monitors the
states of the running jobs, and updates the states of jobs based on the state changes
of their depending jobs states. The class provides APIs for suspending/resuming
the thread,and for stopping the thread.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.jobcontrol.JobControl -->
</package>
<package name="org.apache.hadoop.mapred.join">
<!-- start class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
<class name="ArrayListBackedIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
<constructor name="ArrayListBackedIterator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="ArrayListBackedIterator" type="java.util.ArrayList"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[This class provides an implementation of ResetableIterator. The
implementation uses an {@link java.util.ArrayList} to store elements
added to it, replaying them as requested.
Prefer {@link StreamBackedIterator}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.ArrayListBackedIterator -->
<!-- start interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
<interface name="ComposableInputFormat" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Refinement of InputFormat requiring implementors to provide
ComposableRecordReader instead of RecordReader.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.join.ComposableInputFormat -->
<!-- start interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
<interface name="ComposableRecordReader" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<implements name="java.lang.Comparable"/>
<method name="id" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the position in the collector this class occupies.]]>
</doc>
</method>
<method name="key" return="org.apache.hadoop.io.WritableComparable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key this RecordReader would supply on a call to next(K,V)]]>
</doc>
</method>
<method name="key"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the head of this RecordReader into the object provided.]]>
</doc>
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns true if the stream is not empty, but provides no guarantee that
a call to next(K,V) will succeed.]]>
</doc>
</method>
<method name="skip"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
</doc>
</method>
<method name="accept"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[While key-value pairs from this RecordReader match the given key, register
them with the JoinCollector provided.]]>
</doc>
</method>
<doc>
<![CDATA[Additional operations required of a RecordReader to participate in a join.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.join.ComposableRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.CompositeInputFormat -->
<class name="CompositeInputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
<constructor name="CompositeInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setFormat"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Interpret a given string as a composite expression.
{@code
func ::= <ident>([<func>,]*<func>)
func ::= tbl(<class>,"<path>")
class ::= @see java.lang.Class#forName(java.lang.String)
path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String)
}
Reads expression from the <tt>mapred.join.expr</tt> property and
user-supplied join types from <tt>mapred.join.define.&lt;ident&gt;</tt>
types. Paths supplied to <tt>tbl</tt> are given as input paths to the
InputFormat class listed.
@see #compose(java.lang.String, java.lang.Class, java.lang.String...)]]>
</doc>
</method>
<method name="addDefaults"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Adds the default set of identifiers to the parser.]]>
</doc>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Build a CompositeInputSplit from the child InputFormats by assigning the
ith split from each child to the ith composite split.]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.join.ComposableRecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Construct a CompositeRecordReader for the children of this InputFormat
as defined in the init expression.
The outermost join need only be composable, not necessarily a composite.
Mandating TupleWritable isn't strictly correct.]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="inf" type="java.lang.Class"/>
<param name="path" type="java.lang.String"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given InputFormat class (inf), path (p) return:
{@code tbl(<inf>, <p>) }]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="op" type="java.lang.String"/>
<param name="inf" type="java.lang.Class"/>
<param name="path" type="java.lang.String[]"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given operation (op), Object class (inf), set of paths (p) return:
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
</doc>
</method>
<method name="compose" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="op" type="java.lang.String"/>
<param name="inf" type="java.lang.Class"/>
<param name="path" type="org.apache.hadoop.fs.Path[]"/>
<doc>
<![CDATA[Convenience method for constructing composite formats.
Given operation (op), Object class (inf), set of paths (p) return:
{@code <op>(tbl(<inf>,<p1>),tbl(<inf>,<p2>),...,tbl(<inf>,<pn>)) }]]>
</doc>
</method>
<doc>
<![CDATA[An InputFormat capable of performing joins over a set of data sources sorted
and partitioned the same way.
@see #setFormat
A user may define new join types by setting the property
<tt>mapred.join.define.&lt;ident&gt;</tt> to a classname. In the expression
<tt>mapred.join.expr</tt>, the identifier will be assumed to be a
ComposableRecordReader.
<tt>mapred.join.keycomparator</tt> can be a classname used to compare keys
in the join.
@see JoinRecordReader
@see MultiFilterRecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.CompositeInputFormat -->
<!-- start class org.apache.hadoop.mapred.join.CompositeInputSplit -->
<class name="CompositeInputSplit" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<constructor name="CompositeInputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CompositeInputSplit" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="s" type="org.apache.hadoop.mapred.InputSplit"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an InputSplit to this collection.
@throws IOException If capacity was not specified during construction
or if capacity has been reached.]]>
</doc>
</method>
<method name="get" return="org.apache.hadoop.mapred.InputSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Get ith child InputSplit.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the aggregate length of all child InputSplits currently added.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the length of ith child InputSplit.]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Collect a set of hosts from all child InputSplits.]]>
</doc>
</method>
<method name="getLocation" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[getLocations from ith InputSplit.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write splits in the following format.
{@code
<count><class1><class2>...<classn><split1><split2>...<splitn>
}]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}
@throws IOException If the child InputSplit cannot be read, typically
for faliing access checks.]]>
</doc>
</method>
<doc>
<![CDATA[This InputSplit contains a set of child InputSplits. Any InputSplit inserted
into this collection must have a public default constructor.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.CompositeInputSplit -->
<!-- start class org.apache.hadoop.mapred.join.CompositeRecordReader -->
<class name="CompositeRecordReader" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="CompositeRecordReader" type="int, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a RecordReader with <tt>capacity</tt> children to position
<tt>id</tt> in the parent reader.
The id of a root CompositeRecordReader is -1 by convention, but relying
on this is not recommended.]]>
</doc>
</constructor>
<method name="combine" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
</method>
<method name="id" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the position in the collector this class occupies.]]>
</doc>
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getRecordReaderQueue" return="java.util.PriorityQueue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return sorted list of RecordReaders for this composite.]]>
</doc>
</method>
<method name="getComparator" return="org.apache.hadoop.io.WritableComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return comparator defining the ordering for RecordReaders in this
composite.]]>
</doc>
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rr" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add a RecordReader to this collection.
The id() of a RecordReader determines where in the Tuple its
entry will appear. Adding RecordReaders with the same id has
undefined behavior.]]>
</doc>
</method>
<method name="key" return="org.apache.hadoop.io.WritableComparable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key for the current join or the value at the top of the
RecordReader heap.]]>
</doc>
</method>
<method name="key"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the top of this RR into the given object.]]>
</doc>
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return true if it is possible that this could emit more values.]]>
</doc>
</method>
<method name="skip"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Pass skip key to child RRs.]]>
</doc>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Obtain an iterator over the child RRs apropos of the value type
ultimately emitted from this join.]]>
</doc>
</method>
<method name="accept"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[If key provided matches that of this Composite, give JoinCollector
iterator over values it may emit.]]>
</doc>
</method>
<method name="fillJoinCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="iterkey" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For all child RRs offering the key provided, obtain an iterator
at that position in the JoinCollector.]]>
</doc>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<doc>
<![CDATA[Implement Comparable contract (compare key of join or head of heap
with that of another).]]>
</doc>
</method>
<method name="createKey" return="org.apache.hadoop.io.WritableComparable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new key value common to all child RRs.
@throws ClassCastException if key classes differ.]]>
</doc>
</method>
<method name="createInternalValue" return="org.apache.hadoop.mapred.join.TupleWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Create a value to be used internally for joins.]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Unsupported (returns zero in all cases).]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close all child RRs.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Report progress as the minimum of all child RR progress.]]>
</doc>
</method>
<field name="jc" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"
transient="false" volatile="false"
static="false" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="kids" type="org.apache.hadoop.mapred.join.ComposableRecordReader[]"
transient="false" volatile="false"
static="false" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A RecordReader that can effect joins of RecordReaders sharing a common key
type and partitioning.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.CompositeRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
<class name="InnerJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Return true iff the tuple is full (all data sources contain this key).]]>
</doc>
</method>
<doc>
<![CDATA[Full inner join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.InnerJoinRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.JoinRecordReader -->
<class name="JoinRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<constructor name="JoinRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<param name="value" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Emit the next set of key, value pairs as defined by the child
RecordReaders and operation associated with this composite RR.]]>
</doc>
</method>
<method name="createValue" return="org.apache.hadoop.mapred.join.TupleWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator wrapping the JoinCollector.]]>
</doc>
</method>
<doc>
<![CDATA[Base class for Composite joins returning Tuples of arbitrary Writables.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.JoinRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator -->
<class name="JoinRecordReader.JoinDelegationIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
<constructor name="JoinRecordReader.JoinDelegationIterator"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Since the JoinCollector is effecting our operation, we need only
provide an iterator proxy wrapping its operation.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.JoinRecordReader.JoinDelegationIterator -->
<!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
<class name="MultiFilterRecordReader" extends="org.apache.hadoop.mapred.join.CompositeRecordReader"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<constructor name="MultiFilterRecordReader" type="int, org.apache.hadoop.mapred.JobConf, int, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="emit" return="org.apache.hadoop.io.Writable"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For each tuple emitted, return a value (typically one of the values
in the tuple).
Modifying the Writables in the tuple is permitted and unlikely to affect
join behavior in most cases, but it is not recommended. It's safer to
clone first.]]>
</doc>
</method>
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Default implementation offers {@link #emit} every Tuple from the
collector (the outer join of child RRs).]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<param name="value" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="createValue" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getDelegate" return="org.apache.hadoop.mapred.join.ResetableIterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator returning a single value from the tuple.
@see MultiFilterDelegationIterator]]>
</doc>
</method>
<doc>
<![CDATA[Base class for Composite join returning values derived from multiple
sources, but generally not tuples.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
<class name="MultiFilterRecordReader.MultiFilterDelegationIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
<constructor name="MultiFilterRecordReader.MultiFilterDelegationIterator"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Proxy the JoinCollector, but include callback to emit.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.MultiFilterRecordReader.MultiFilterDelegationIterator -->
<!-- start class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
<class name="OuterJoinRecordReader" extends="org.apache.hadoop.mapred.join.JoinRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="combine" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="srcs" type="java.lang.Object[]"/>
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Emit everything from the collector.]]>
</doc>
</method>
<doc>
<![CDATA[Full outer join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.OuterJoinRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.OverrideRecordReader -->
<class name="OverrideRecordReader" extends="org.apache.hadoop.mapred.join.MultiFilterRecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="emit" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="dst" type="org.apache.hadoop.mapred.join.TupleWritable"/>
<doc>
<![CDATA[Emit the value with the highest position in the tuple.]]>
</doc>
</method>
<method name="fillJoinCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="iterkey" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Instead of filling the JoinCollector with iterators from all
data sources, fill only the rightmost for this key.
This not only saves space by discarding the other sources, but
it also emits the number of key-value pairs in the preferred
RecordReader instead of repeating that stream n times, where
n is the cardinality of the cross product of the discarded
streams for the given key.]]>
</doc>
</method>
<doc>
<![CDATA[Prefer the &quot;rightmost&quot; data source for this key.
For example, <tt>override(S1,S2,S3)</tt> will prefer values
from S3 over S2, and values from S2 over S1 for all keys
emitted from all sources.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.OverrideRecordReader -->
<!-- start class org.apache.hadoop.mapred.join.Parser -->
<class name="Parser" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Parser"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<doc>
<![CDATA[Very simple shift-reduce parser for join expressions.
This should be sufficient for the user extension permitted now, but ought to
be replaced with a parser generator if more complex grammars are supported.
In particular, this &quot;shift-reduce&quot; parser has no states. Each set
of formals requires a different internal node type, which is responsible for
interpreting the list of tokens it receives. This is sufficient for the
current grammar, but it has several annoying properties that might inhibit
extension. In particular, parenthesis are always function calls; an
algebraic or filter grammar would not only require a node type, but must
also work around the internals of this parser.
For most other cases, adding classes to the hierarchy- particularly by
extending JoinRecordReader and MultiFilterRecordReader- is fairly
straightforward. One need only override the relevant method(s) (usually only
{@link CompositeRecordReader#combine}) and include a property to map its
value to an identifier in the parser.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser -->
<!-- start class org.apache.hadoop.mapred.join.Parser.Node -->
<class name="Parser.Node" extends="java.lang.Object"
abstract="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableInputFormat"/>
<constructor name="Parser.Node" type="java.lang.String"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="addIdentifier"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<param name="ident" type="java.lang.String"/>
<param name="mcstrSig" type="java.lang.Class[]"/>
<param name="nodetype" type="java.lang.Class"/>
<param name="cl" type="java.lang.Class"/>
<exception name="NoSuchMethodException" type="java.lang.NoSuchMethodException"/>
<doc>
<![CDATA[For a given identifier, add a mapping to the nodetype for the parse
tree and to the ComposableRecordReader to be created, including the
formals required to invoke the constructor.
The nodetype and constructor signature should be filled in from the
child node.]]>
</doc>
</method>
<method name="setID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="id" type="int"/>
</method>
<method name="setKeyComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="cmpcl" type="java.lang.Class"/>
</method>
<field name="rrCstrMap" type="java.util.Map"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="id" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="ident" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="cmpcl" type="java.lang.Class"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.Node -->
<!-- start class org.apache.hadoop.mapred.join.Parser.NodeToken -->
<class name="Parser.NodeToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.NodeToken -->
<!-- start class org.apache.hadoop.mapred.join.Parser.NumToken -->
<class name="Parser.NumToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Parser.NumToken" type="double"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getNum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.NumToken -->
<!-- start class org.apache.hadoop.mapred.join.Parser.StrToken -->
<class name="Parser.StrToken" extends="org.apache.hadoop.mapred.join.Parser.Token"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Parser.StrToken" type="org.apache.hadoop.mapred.join.Parser.TType, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getStr" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.StrToken -->
<!-- start class org.apache.hadoop.mapred.join.Parser.Token -->
<class name="Parser.Token" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="getType" return="org.apache.hadoop.mapred.join.Parser.TType"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getNode" return="org.apache.hadoop.mapred.join.Parser.Node"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getNum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getStr" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Tagged-union type for tokens from the join expression.
@see Parser.TType]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.Token -->
<!-- start class org.apache.hadoop.mapred.join.Parser.TType -->
<class name="Parser.TType" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapred.join.Parser.TType[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapred.join.Parser.TType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="CIF" type="org.apache.hadoop.mapred.join.Parser.TType"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="IDENT" type="org.apache.hadoop.mapred.join.Parser.TType"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="COMMA" type="org.apache.hadoop.mapred.join.Parser.TType"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LPAREN" type="org.apache.hadoop.mapred.join.Parser.TType"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RPAREN" type="org.apache.hadoop.mapred.join.Parser.TType"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="QUOT" type="org.apache.hadoop.mapred.join.Parser.TType"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="NUM" type="org.apache.hadoop.mapred.join.Parser.TType"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapred.join.Parser.TType -->
<!-- start interface org.apache.hadoop.mapred.join.ResetableIterator -->
<interface name="ResetableIterator" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[True if a call to next may return a value. This is permitted false
positives, but not false negatives.]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Assign next value to actual.
It is required that elements added to a ResetableIterator be returned in
the same order after a call to {@link #reset} (FIFO).
Note that a call to this may fail for nested joins (i.e. more elements
available, but none satisfying the constraints of the join)]]>
</doc>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Assign last value returned to actual.]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Set iterator to return to the start of its range. Must be called after
calling {@link #add} to avoid a ConcurrentModificationException.]]>
</doc>
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an element to the collection of elements to iterate over.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close datasources and release resources. Calling methods on the iterator
after calling close has undefined behavior.]]>
</doc>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Close datasources, but do not release internal resources. Calling this
method should permit the object to be reused with a different datasource.]]>
</doc>
</method>
<doc>
<![CDATA[This defines an interface to a stateful Iterator that can replay elements
added to it directly.
Note that this does not extend {@link java.util.Iterator}.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.join.ResetableIterator -->
<!-- start class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY -->
<class name="ResetableIterator.EMPTY" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
<constructor name="ResetableIterator.EMPTY"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapred.join.ResetableIterator.EMPTY -->
<!-- start class org.apache.hadoop.mapred.join.StreamBackedIterator -->
<class name="StreamBackedIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ResetableIterator"/>
<constructor name="StreamBackedIterator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="replay" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="add"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="item" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="clear"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[This class provides an implementation of ResetableIterator. This
implementation uses a byte array to store elements added to it.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.StreamBackedIterator -->
<!-- start class org.apache.hadoop.mapred.join.TupleWritable -->
<class name="TupleWritable" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<constructor name="TupleWritable"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create an empty tuple with no allocated storage for writables.]]>
</doc>
</constructor>
<constructor name="TupleWritable" type="org.apache.hadoop.io.Writable[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Initialize tuple with storage; unknown whether any of them contain
&quot;written&quot; values.]]>
</doc>
</constructor>
<method name="has" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Return true if tuple has an element at the position provided.]]>
</doc>
</method>
<method name="get" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Get ith Writable from Tuple.]]>
</doc>
</method>
<method name="size" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The number of children in this Tuple.]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="java.lang.Object"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return an iterator over the elements in this tuple.
Note that this doesn't flatten the tuple; one may receive tuples
from this iterator.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Convert Tuple to String as in the following.
<tt>[<child1>,<child2>,...,<childn>]</tt>]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Writes each Writable to <code>out</code>.
TupleWritable format:
{@code
<count><type1><type2>...<typen><obj1><obj2>...<objn>
}]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<doc>
<![CDATA[Writable type storing multiple {@link org.apache.hadoop.io.Writable}s.
This is *not* a general-purpose tuple type. In almost all cases, users are
encouraged to implement their own serializable types, which can perform
better validation and provide more efficient encodings than this class is
capable. TupleWritable relies on the join framework for type safety and
assumes its instances will rarely be persisted, assumptions not only
incompatible with, but contrary to the general case.
@see org.apache.hadoop.io.Writable]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.TupleWritable -->
<!-- start class org.apache.hadoop.mapred.join.WrappedRecordReader -->
<class name="WrappedRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<method name="id" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="key" return="org.apache.hadoop.io.WritableComparable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the key at the head of this RR.]]>
</doc>
</method>
<method name="key"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="qkey" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Clone the key at the head of this RR into the object supplied.]]>
</doc>
</method>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return true if the RR- including the k,v pair stored in this object-
is exhausted.]]>
</doc>
</method>
<method name="skip"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Skip key-value pairs with keys less than or equal to the key provided.]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read the next k,v pair into the head of this object; return true iff
the RR and this are exhausted.]]>
</doc>
</method>
<method name="accept"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="org.apache.hadoop.mapred.join.CompositeRecordReader.JoinCollector"/>
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add an iterator to the collector at the position occupied by this
RecordReader over the values in this stream paired with the key
provided (ie register a stream of values from this source matching K
with a collector).]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<param name="value" type="org.apache.hadoop.io.Writable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write key-value pair at the head of this stream to the objects provided;
get next key-value pair from proxied RR.]]>
</doc>
</method>
<method name="createKey" return="org.apache.hadoop.io.WritableComparable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Request new key from proxied RR.]]>
</doc>
</method>
<method name="createValue" return="org.apache.hadoop.io.Writable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Request new value from proxied RR.]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Request progress from proxied RR.]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Request position from proxied RR.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Forward close request to proxied RR.]]>
</doc>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapred.join.ComposableRecordReader"/>
<doc>
<![CDATA[Implement Comparable contract (compare key at head of proxied RR
with that of another).]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="java.lang.Object"/>
<doc>
<![CDATA[Return true iff compareTo(other) retn true.]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[Proxy class for a RecordReader participating in the join framework.
This class keeps track of the &quot;head&quot; key-value pair for the
provided RecordReader and keeps a store of values matching a key when
this source is participating in a join.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.join.WrappedRecordReader -->
</package>
<package name="org.apache.hadoop.mapred.lib">
<!-- start class org.apache.hadoop.mapred.lib.ChainMapper -->
<class name="ChainMapper" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="ChainMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructor.]]>
</doc>
</constructor>
<method name="addMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="byValue" type="boolean"/>
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Adds a Mapper class to the chain job's JobConf.
<p/>
It has to be specified how key and values are passed from one element of
the chain to the next, by value or by reference. If a Mapper leverages the
assumed semantics that the key and values are not modified by the collector
'by value' must be used. If the Mapper does not expect this semantics, as
an optimization to avoid serialization and deserialization 'by reference'
can be used.
<p/>
For the added Mapper the configuration given for it,
<code>mapperConf</code>, have precedence over the job's JobConf. This
precedence is in effect when the task is running.
<p/>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain
<p/>
@param job job's JobConf to add the Mapper class.
@param klass the Mapper class to add.
@param inputKeyClass mapper input key class.
@param inputValueClass mapper input value class.
@param outputKeyClass mapper output key class.
@param outputValueClass mapper output value class.
@param byValue indicates if key/values should be passed by value
to the next Mapper in the chain, if any.
@param mapperConf a JobConf with the configuration for the Mapper
class. It is recommended to use a JobConf without default values using the
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Configures the ChainMapper and all the Mappers in the chain.
<p/>
If this method is overriden <code>super.configure(...)</code> should be
invoked at the beginning of the overwriter method.]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Chains the <code>map(...)</code> methods of the Mappers in the chain.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Closes the ChainMapper and all the Mappers in the chain.
<p/>
If this method is overriden <code>super.close()</code> should be
invoked at the end of the overwriter method.]]>
</doc>
</method>
<doc>
<![CDATA[The ChainMapper class allows to use multiple Mapper classes within a single
Map task.
<p/>
The Mapper classes are invoked in a chained (or piped) fashion, the output of
the first becomes the input of the second, and so on until the last Mapper,
the output of the last Mapper will be written to the task's output.
<p/>
The key functionality of this feature is that the Mappers in the chain do not
need to be aware that they are executed in a chain. This enables having
reusable specialized Mappers that can be combined to perform composite
operations within a single task.
<p/>
Special care has to be taken when creating chains that the key/values output
by a Mapper are valid for the following Mapper in the chain. It is assumed
all Mappers and the Reduce in the chain use maching output and input key and
value classes as no conversion is done by the chaining code.
<p/>
Using the ChainMapper and the ChainReducer classes is possible to compose
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
immediate benefit of this pattern is a dramatic reduction in disk IO.
<p/>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain.
<p/>
ChainMapper usage pattern:
<p/>
<pre>
...
conf.setJobName("chain");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
<p/>
JobConf mapAConf = new JobConf(false);
...
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
Text.class, Text.class, true, mapAConf);
<p/>
JobConf mapBConf = new JobConf(false);
...
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, mapBConf);
<p/>
JobConf reduceConf = new JobConf(false);
...
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
Text.class, Text.class, true, reduceConf);
<p/>
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, null);
<p/>
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
LongWritable.class, LongWritable.class, true, null);
<p/>
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
...
<p/>
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
...
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.ChainMapper -->
<!-- start class org.apache.hadoop.mapred.lib.ChainReducer -->
<class name="ChainReducer" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="ChainReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructor.]]>
</doc>
</constructor>
<method name="setReducer"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="byValue" type="boolean"/>
<param name="reducerConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Sets the Reducer class to the chain job's JobConf.
<p/>
It has to be specified how key and values are passed from one element of
the chain to the next, by value or by reference. If a Reducer leverages the
assumed semantics that the key and values are not modified by the collector
'by value' must be used. If the Reducer does not expect this semantics, as
an optimization to avoid serialization and deserialization 'by reference'
can be used.
<p/>
For the added Reducer the configuration given for it,
<code>reducerConf</code>, have precedence over the job's JobConf. This
precedence is in effect when the task is running.
<p/>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainReducer, this is done by the setReducer or the addMapper for the last
element in the chain.
@param job job's JobConf to add the Reducer class.
@param klass the Reducer class to add.
@param inputKeyClass reducer input key class.
@param inputValueClass reducer input value class.
@param outputKeyClass reducer output key class.
@param outputValueClass reducer output value class.
@param byValue indicates if key/values should be passed by value
to the next Mapper in the chain, if any.
@param reducerConf a JobConf with the configuration for the Reducer
class. It is recommended to use a JobConf without default values using the
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
</doc>
</method>
<method name="addMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="klass" type="java.lang.Class"/>
<param name="inputKeyClass" type="java.lang.Class"/>
<param name="inputValueClass" type="java.lang.Class"/>
<param name="outputKeyClass" type="java.lang.Class"/>
<param name="outputValueClass" type="java.lang.Class"/>
<param name="byValue" type="boolean"/>
<param name="mapperConf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Adds a Mapper class to the chain job's JobConf.
<p/>
It has to be specified how key and values are passed from one element of
the chain to the next, by value or by reference. If a Mapper leverages the
assumed semantics that the key and values are not modified by the collector
'by value' must be used. If the Mapper does not expect this semantics, as
an optimization to avoid serialization and deserialization 'by reference'
can be used.
<p/>
For the added Mapper the configuration given for it,
<code>mapperConf</code>, have precedence over the job's JobConf. This
precedence is in effect when the task is running.
<p/>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainMapper, this is done by the addMapper for the last mapper in the chain
.
@param job chain job's JobConf to add the Mapper class.
@param klass the Mapper class to add.
@param inputKeyClass mapper input key class.
@param inputValueClass mapper input value class.
@param outputKeyClass mapper output key class.
@param outputValueClass mapper output value class.
@param byValue indicates if key/values should be passed by value
to the next Mapper in the chain, if any.
@param mapperConf a JobConf with the configuration for the Mapper
class. It is recommended to use a JobConf without default values using the
<code>JobConf(boolean loadDefaults)</code> constructor with FALSE.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Configures the ChainReducer, the Reducer and all the Mappers in the chain.
<p/>
If this method is overriden <code>super.configure(...)</code> should be
invoked at the beginning of the overwriter method.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Chains the <code>reduce(...)</code> method of the Reducer with the
<code>map(...) </code> methods of the Mappers in the chain.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Closes the ChainReducer, the Reducer and all the Mappers in the chain.
<p/>
If this method is overriden <code>super.close()</code> should be
invoked at the end of the overwriter method.]]>
</doc>
</method>
<doc>
<![CDATA[The ChainReducer class allows to chain multiple Mapper classes after a
Reducer within the Reducer task.
<p/>
For each record output by the Reducer, the Mapper classes are invoked in a
chained (or piped) fashion, the output of the first becomes the input of the
second, and so on until the last Mapper, the output of the last Mapper will
be written to the task's output.
<p/>
The key functionality of this feature is that the Mappers in the chain do not
need to be aware that they are executed after the Reducer or in a chain.
This enables having reusable specialized Mappers that can be combined to
perform composite operations within a single task.
<p/>
Special care has to be taken when creating chains that the key/values output
by a Mapper are valid for the following Mapper in the chain. It is assumed
all Mappers and the Reduce in the chain use maching output and input key and
value classes as no conversion is done by the chaining code.
<p/>
Using the ChainMapper and the ChainReducer classes is possible to compose
Map/Reduce jobs that look like <code>[MAP+ / REDUCE MAP*]</code>. And
immediate benefit of this pattern is a dramatic reduction in disk IO.
<p/>
IMPORTANT: There is no need to specify the output key/value classes for the
ChainReducer, this is done by the setReducer or the addMapper for the last
element in the chain.
<p/>
ChainReducer usage pattern:
<p/>
<pre>
...
conf.setJobName("chain");
conf.setInputFormat(TextInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
<p/>
JobConf mapAConf = new JobConf(false);
...
ChainMapper.addMapper(conf, AMap.class, LongWritable.class, Text.class,
Text.class, Text.class, true, mapAConf);
<p/>
JobConf mapBConf = new JobConf(false);
...
ChainMapper.addMapper(conf, BMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, mapBConf);
<p/>
JobConf reduceConf = new JobConf(false);
...
ChainReducer.setReducer(conf, XReduce.class, LongWritable.class, Text.class,
Text.class, Text.class, true, reduceConf);
<p/>
ChainReducer.addMapper(conf, CMap.class, Text.class, Text.class,
LongWritable.class, Text.class, false, null);
<p/>
ChainReducer.addMapper(conf, DMap.class, LongWritable.class, Text.class,
LongWritable.class, LongWritable.class, true, null);
<p/>
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
...
<p/>
JobClient jc = new JobClient(conf);
RunningJob job = jc.submitJob(conf);
...
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.ChainReducer -->
<!-- start class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
<class name="CombineFileInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="CombineFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[default constructor]]>
</doc>
</constructor>
<method name="setMaxSplitSize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="maxSplitSize" type="long"/>
<doc>
<![CDATA[Specify the maximum size (in bytes) of each split. Each split is
approximately equal to the specified size.]]>
</doc>
</method>
<method name="setMinSplitSizeNode"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="minSplitSizeNode" type="long"/>
<doc>
<![CDATA[Specify the minimum size (in bytes) of each split per node.
This applies to data that is left over after combining data on a single
node into splits that are of maximum size specified by maxSplitSize.
This leftover data will be combined into its own split if its size
exceeds minSplitSizeNode.]]>
</doc>
</method>
<method name="setMinSplitSizeRack"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="minSplitSizeRack" type="long"/>
<doc>
<![CDATA[Specify the minimum size (in bytes) of each split per rack.
This applies to data that is left over after combining data on a single
rack into splits that are of maximum size specified by maxSplitSize.
This leftover data will be combined into its own split if its size
exceeds minSplitSizeRack.]]>
</doc>
</method>
<method name="createPool"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="filters" type="java.util.List"/>
<doc>
<![CDATA[Create a new pool and add the filters to it.
A split cannot have files from different pools.]]>
</doc>
</method>
<method name="createPool"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="filters" type="org.apache.hadoop.fs.PathFilter[]"/>
<doc>
<![CDATA[Create a new pool and add the filters to it.
A pathname can satisfy any one of the specified filters.
A split cannot have files from different pools.]]>
</doc>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[This is not implemented yet.]]>
</doc>
</method>
<doc>
<![CDATA[An abstract {@link org.apache.hadoop.mapred.InputFormat} that returns {@link CombineFileSplit}'s
in {@link org.apache.hadoop.mapred.InputFormat#getSplits(JobConf, int)} method.
Splits are constructed from the files under the input paths.
A split cannot have files from different pools.
Each split returned may contain blocks from different files.
If a maxSplitSize is specified, then blocks on the same node are
combined to form a single split. Blocks that are left over are
then combined with other blocks in the same rack.
If maxSplitSize is not specified, then blocks from the same rack
are combined in a single split; no attempt is made to create
node-local splits.
If the maxSplitSize is equal to the block size, then this class
is similar to the default spliting behaviour in Hadoop: each
block is a locally processed split.
Subclasses implement {@link org.apache.hadoop.mapred.InputFormat#getRecordReader(InputSplit, JobConf, Reporter)}
to construct <code>RecordReader</code>'s for <code>CombineFileSplit</code>'s.
@see CombineFileSplit]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineFileInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
<class name="CombineFileRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="CombineFileRecordReader" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.mapred.lib.CombineFileSplit, org.apache.hadoop.mapred.Reporter, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[A generic RecordReader that can hand out different recordReaders
for each chunk in the CombineFileSplit.]]>
</doc>
</constructor>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createKey" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="createValue" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[return the amount of data processed]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[return progress based on the amount of data processed so far.]]>
</doc>
</method>
<method name="initNextRecordReader" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the record reader for the next chunk in this CombineFileSplit.]]>
</doc>
</method>
<field name="split" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="jc" type="org.apache.hadoop.mapred.JobConf"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="reporter" type="org.apache.hadoop.mapred.Reporter"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="rrClass" type="java.lang.Class"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="rrConstructor" type="java.lang.reflect.Constructor"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="fs" type="org.apache.hadoop.fs.FileSystem"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="idx" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="progress" type="long"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<field name="curReader" type="org.apache.hadoop.mapred.RecordReader"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A generic RecordReader that can hand out different recordReaders
for each chunk in a {@link CombineFileSplit}.
A CombineFileSplit can combine data chunks from multiple files.
This class allows using different RecordReaders for processing
these data chunks from different files.
@see CombineFileSplit]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineFileRecordReader -->
<!-- start class org.apache.hadoop.mapred.lib.CombineFileSplit -->
<class name="CombineFileSplit" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<constructor name="CombineFileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[default constructor]]>
</doc>
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[], long[], java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.JobConf, org.apache.hadoop.fs.Path[], long[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="CombineFileSplit" type="org.apache.hadoop.mapred.lib.CombineFileSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Copy constructor]]>
</doc>
</constructor>
<method name="getJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getStartOffsets" return="long[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns an array containing the startoffsets of the files in the split]]>
</doc>
</method>
<method name="getLengths" return="long[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns an array containing the lengths of the files in the split]]>
</doc>
</method>
<method name="getOffset" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Returns the start offset of the i<sup>th</sup> Path]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Returns the length of the i<sup>th</sup> Path]]>
</doc>
</method>
<method name="getNumPaths" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the number of Paths in the split]]>
</doc>
</method>
<method name="getPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="i" type="int"/>
<doc>
<![CDATA[Returns the i<sup>th</sup> Path]]>
</doc>
</method>
<method name="getPaths" return="org.apache.hadoop.fs.Path[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns all the Paths in the split]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Returns all the Paths where this input-split resides]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A sub-collection of input files. Unlike {@link org.apache.hadoop.mapred.FileSplit},
CombineFileSplit * class does not represent a split of a file, but a split of input files
into smaller sets. A split may contain blocks from different file but all
the blocks in the same split are probably local to some rack <br>
CombineFileSplit can be used to implement {@link org.apache.hadoop.mapred.RecordReader}'s,
with reading one record per file.
@see org.apache.hadoop.mapred.FileSplit
@see CombineFileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.CombineFileSplit -->
<!-- start class org.apache.hadoop.mapred.lib.DelegatingInputFormat -->
<class name="DelegatingInputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<constructor name="DelegatingInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} that delegates behaviour of paths to multiple other
InputFormats.
@see MultipleInputs#addInputPath(JobConf, Path, Class, Class)]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.DelegatingInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.DelegatingMapper -->
<class name="DelegatingMapper" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="DelegatingMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="outputCollector" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link Mapper} that delegates behaviour of paths to multiple other
mappers.
@see MultipleInputs#addInputPath(JobConf, Path, Class, Class)]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.DelegatingMapper -->
<!-- start class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
<class name="FieldSelectionMapReduce" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="FieldSelectionMapReduce"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The identify function. Input key/value pair is written directly to output.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements a mapper/reducer class that can be used to perform
field selections in a manner similar to unix cut. The input data is treated
as fields separated by a user specified separator (the default value is
"\t"). The user can specify a list of fields that form the map output keys,
and a list of fields that form the map output values. If the inputformat is
TextInputFormat, the mapper will ignore the key to the map function. and the
fields are from the value only. Otherwise, the fields are the union of those
from the key and those from the value.
The field separator is under attribute "mapred.data.field.separator"
The map output field list spec is under attribute "map.output.key.value.fields.spec".
The value is expected to be like "keyFieldsSpec:valueFieldsSpec"
key/valueFieldsSpec are comma (,) separated field spec: fieldSpec,fieldSpec,fieldSpec ...
Each field spec can be a simple number (e.g. 5) specifying a specific field, or a range
(like 2-5) to specify a range of fields, or an open range (like 3-) specifying all
the fields starting from field 3. The open range field spec applies value fields only.
They have no effect on the key fields.
Here is an example: "4,3,0,1:6,5,1-3,7-". It specifies to use fields 4,3,0 and 1 for keys,
and use fields 6,5,1,2,3,7 and above for values.
The reduce output field list spec is under attribute "reduce.output.key.value.fields.spec".
The reducer extracts output key/value pairs in a similar manner, except that
the key is never ignored.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.FieldSelectionMapReduce -->
<!-- start class org.apache.hadoop.mapred.lib.HashPartitioner -->
<class name="HashPartitioner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use
{@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead.">
<implements name="org.apache.hadoop.mapred.Partitioner"/>
<constructor name="HashPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="numReduceTasks" type="int"/>
<doc>
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
</doc>
</method>
<doc>
<![CDATA[Partition keys by their {@link Object#hashCode()}.
@deprecated Use
{@link org.apache.hadoop.mapreduce.lib.partition.HashPartitioner} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.HashPartitioner -->
<!-- start class org.apache.hadoop.mapred.lib.IdentityMapper -->
<class name="IdentityMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Mapper} instead.">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="IdentityMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The identify function. Input key/value pair is written directly to
output.]]>
</doc>
</method>
<doc>
<![CDATA[Implements the identity function, mapping inputs directly to outputs.
@deprecated Use {@link org.apache.hadoop.mapreduce.Mapper} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.IdentityMapper -->
<!-- start class org.apache.hadoop.mapred.lib.IdentityReducer -->
<class name="IdentityReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.Reducer} instead.">
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="IdentityReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Writes all keys and values directly to output.]]>
</doc>
</method>
<doc>
<![CDATA[Performs no reduction, writing all input values directly to the output.
@deprecated Use {@link org.apache.hadoop.mapreduce.Reducer} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.IdentityReducer -->
<!-- start class org.apache.hadoop.mapred.lib.InputSampler -->
<class name="InputSampler" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="InputSampler" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="writePartitionFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="sampler" type="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write a partition file for the given job, using the Sampler provided.
Queries the sampler for a sample keyset, sorts by the output key
comparator, selects the keys for each rank, and writes to the destination
returned from {@link
org.apache.hadoop.mapred.lib.TotalOrderPartitioner#getPartitionFile}.]]>
</doc>
</method>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<doc>
<![CDATA[Driver for InputSampler from the command line.
Configures a JobConf instance and calls {@link #writePartitionFile}.]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<doc>
<![CDATA[Utility for collecting samples and writing a partition file for
{@link org.apache.hadoop.mapred.lib.TotalOrderPartitioner}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.InputSampler -->
<!-- start class org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler -->
<class name="InputSampler.IntervalSampler" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
<constructor name="InputSampler.IntervalSampler" type="double"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new IntervalSampler sampling <em>all</em> splits.
@param freq The frequency with which records will be emitted.]]>
</doc>
</constructor>
<constructor name="InputSampler.IntervalSampler" type="double, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new IntervalSampler.
@param freq The frequency with which records will be emitted.
@param maxSplitsSampled The maximum number of splits to examine.
@see #getSample]]>
</doc>
</constructor>
<method name="getSample" return="java.lang.Object[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For each split sampled, emit when the ratio of the number of records
retained to the total record count is less than the specified
frequency.]]>
</doc>
</method>
<doc>
<![CDATA[Sample from s splits at regular intervals.
Useful for sorted data.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.InputSampler.IntervalSampler -->
<!-- start class org.apache.hadoop.mapred.lib.InputSampler.RandomSampler -->
<class name="InputSampler.RandomSampler" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
<constructor name="InputSampler.RandomSampler" type="double, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new RandomSampler sampling <em>all</em> splits.
This will read every split at the client, which is very expensive.
@param freq Probability with which a key will be chosen.
@param numSamples Total number of samples to obtain from all selected
splits.]]>
</doc>
</constructor>
<constructor name="InputSampler.RandomSampler" type="double, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new RandomSampler.
@param freq Probability with which a key will be chosen.
@param numSamples Total number of samples to obtain from all selected
splits.
@param maxSplitsSampled The maximum number of splits to examine.]]>
</doc>
</constructor>
<method name="getSample" return="java.lang.Object[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Randomize the split order, then take the specified number of keys from
each split sampled, where each key is selected with the specified
probability and possibly replaced by a subsequently selected key when
the quota of keys from that split is satisfied.]]>
</doc>
</method>
<doc>
<![CDATA[Sample from random points in the input.
General-purpose sampler. Takes numSamples / maxSplitsSampled inputs from
each split.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.InputSampler.RandomSampler -->
<!-- start interface org.apache.hadoop.mapred.lib.InputSampler.Sampler -->
<interface name="InputSampler.Sampler" abstract="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<method name="getSample" return="java.lang.Object[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For a given job, collect and return a subset of the keys from the
input data.]]>
</doc>
</method>
<doc>
<![CDATA[Interface to sample using an {@link org.apache.hadoop.mapred.InputFormat}.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.lib.InputSampler.Sampler -->
<!-- start class org.apache.hadoop.mapred.lib.InputSampler.SplitSampler -->
<class name="InputSampler.SplitSampler" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.InputSampler.Sampler"/>
<constructor name="InputSampler.SplitSampler" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a SplitSampler sampling <em>all</em> splits.
Takes the first numSamples / numSplits records from each split.
@param numSamples Total number of samples to obtain from all selected
splits.]]>
</doc>
</constructor>
<constructor name="InputSampler.SplitSampler" type="int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create a new SplitSampler.
@param numSamples Total number of samples to obtain from all selected
splits.
@param maxSplitsSampled The maximum number of splits to examine.]]>
</doc>
</constructor>
<method name="getSample" return="java.lang.Object[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="inf" type="org.apache.hadoop.mapred.InputFormat"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[From each split sampled, take the first numSamples / numSplits records.]]>
</doc>
</method>
<doc>
<![CDATA[Samples the first n records from s splits.
Inexpensive way to sample random data.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.InputSampler.SplitSampler -->
<!-- start class org.apache.hadoop.mapred.lib.InverseMapper -->
<class name="InverseMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper}
instead.">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="InverseMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[The inverse function. Input keys and values are swapped.]]>
</doc>
</method>
<doc>
<![CDATA[A {@link Mapper} that swaps keys and values.
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.map.InverseMapper}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.InverseMapper -->
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
<class name="KeyFieldBasedComparator" extends="org.apache.hadoop.io.WritableComparator"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="KeyFieldBasedComparator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="compare" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="b1" type="byte[]"/>
<param name="s1" type="int"/>
<param name="l1" type="int"/>
<param name="b2" type="byte[]"/>
<param name="s2" type="int"/>
<param name="l2" type="int"/>
</method>
<doc>
<![CDATA[This comparator implementation provides a subset of the features provided
by the Unix/GNU Sort. In particular, the supported features are:
-n, (Sort numerically)
-r, (Reverse the result of comparison)
-k pos1[,pos2], where pos is of the form f[.c][opts], where f is the number
of the field to use, and c is the number of the first character from the
beginning of the field. Fields and character posns are numbered starting
with 1; a character position of zero in pos2 indicates the field's last
character. If '.c' is omitted from pos1, it defaults to 1 (the beginning
of the field); if omitted from pos2, it defaults to 0 (the end of the
field). opts are ordering options (any of 'nr' as described above).
We assume that the fields in the key are separated by
map.output.key.field.separator.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedComparator -->
<!-- start class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
<class name="KeyFieldBasedPartitioner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Partitioner"/>
<constructor name="KeyFieldBasedPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="numReduceTasks" type="int"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="b" type="byte[]"/>
<param name="start" type="int"/>
<param name="end" type="int"/>
<param name="currentHash" type="int"/>
</method>
<doc>
<![CDATA[Defines a way to partition keys based on certain key fields (also see
{@link KeyFieldBasedComparator}.
The key specification supported is of the form -k pos1[,pos2], where,
pos is of the form f[.c][opts], where f is the number
of the key field to use, and c is the number of the first character from
the beginning of the field. Fields and character posns are numbered
starting with 1; a character position of zero in pos2 indicates the
field's last character. If '.c' is omitted from pos1, it defaults to 1
(the beginning of the field); if omitted from pos2, it defaults to 0
(the end of the field).]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner -->
<!-- start class org.apache.hadoop.mapred.lib.LongSumReducer -->
<class name="LongSumReducer" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer}
instead.">
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="LongSumReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A {@link Reducer} that sums long values.
@deprecated Use {@link org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer}
instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.LongSumReducer -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleInputs -->
<class name="MultipleInputs" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleInputs"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFormatClass" type="java.lang.Class"/>
<doc>
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} to the list of
inputs for the map-reduce job.
@param conf The configuration of the job
@param path {@link Path} to be added to the list of inputs for the job
@param inputFormatClass {@link InputFormat} class to use for this path]]>
</doc>
</method>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<param name="inputFormatClass" type="java.lang.Class"/>
<param name="mapperClass" type="java.lang.Class"/>
<doc>
<![CDATA[Add a {@link Path} with a custom {@link InputFormat} and
{@link Mapper} to the list of inputs for the map-reduce job.
@param conf The configuration of the job
@param path {@link Path} to be added to the list of inputs for the job
@param inputFormatClass {@link InputFormat} class to use for this path
@param mapperClass {@link Mapper} class to use for this path]]>
</doc>
</method>
<doc>
<![CDATA[This class supports MapReduce jobs that have multiple input paths with
a different {@link InputFormat} and {@link Mapper} for each path]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleInputs -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
<class name="MultipleOutputFormat" extends="org.apache.hadoop.mapred.FileOutputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a composite record writer that can write key/value data to different
output files
@param fs
the file system to use
@param job
the job conf for the job
@param name
the leaf file name for the output file (such as part-00000")
@param arg3
a progressable for reporting progress.
@return a composite record writer
@throws IOException]]>
</doc>
</method>
<method name="generateLeafFileName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Generate the leaf name for the output file name. The default behavior does
not change the leaf file name (such as part-00000)
@param name
the leaf file name for the output file
@return the given leaf file name]]>
</doc>
</method>
<method name="generateFileNameForKeyValue" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Generate the file output file name based on the given key and the leaf file
name. The default behavior is that the file name does not depend on the
key.
@param key
the key of the output data
@param name
the leaf file name
@return generated file name]]>
</doc>
</method>
<method name="generateActualKey" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<doc>
<![CDATA[Generate the actual key from the given key/value. The default behavior is that
the actual key is equal to the given key
@param key
the key of the output data
@param value
the value of the output data
@return the actual key derived from the given key/value]]>
</doc>
</method>
<method name="generateActualValue" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<doc>
<![CDATA[Generate the actual value from the given key and value. The default behavior is that
the actual value is equal to the given value
@param key
the key of the output data
@param value
the value of the output data
@return the actual value derived from the given key/value]]>
</doc>
</method>
<method name="getInputFileBasedOutputFileName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<doc>
<![CDATA[Generate the outfile name based on a given anme and the input file name. If
the map input file does not exists (i.e. this is not for a map only job),
the given name is returned unchanged. If the config value for
"num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
name is returned unchanged. Otherwise, return a file name consisting of the
N trailing legs of the input file name where N is the config value for
"num.of.trailing.legs.to.use".
@param job
the job config
@param name
the output file name
@return the outfile name based on a given anme and the input file name.]]>
</doc>
</method>
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@param fs
the file system to use
@param job
a job conf object
@param name
the name of the file over which a record writer object will be
constructed
@param arg3
a progressable object
@return A RecordWriter object over the given file
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[This abstract class extends the FileOutputFormat, allowing to write the
output data to different output files. There are three basic use cases for
this class.
Case one: This class is used for a map reduce job with at least one reducer.
The reducer wants to write data to different files depending on the actual
keys. It is assumed that a key (or value) encodes the actual key (value)
and the desired location for the actual key (value).
Case two: This class is used for a map only job. The job wants to use an
output file name that is either a part of the input file name of the input
data, or some derivation of it.
Case three: This class is used for a map only job. The job wants to use an
output file name that depends on both the keys and the input file name,]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleOutputs -->
<class name="MultipleOutputs" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleOutputs" type="org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Creates and initializes multiple named outputs support, it should be
instantiated in the Mapper/Reducer configure method.
@param job the job configuration object]]>
</doc>
</constructor>
<method name="getNamedOutputsList" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Returns list of channel names.
@param conf job conf
@return List of channel Names]]>
</doc>
</method>
<method name="isMultiNamedOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns if a named output is multiple.
@param conf job conf
@param namedOutput named output
@return <code>true</code> if the name output is multi, <code>false</code>
if it is single. If the name output is not defined it returns
<code>false</code>]]>
</doc>
</method>
<method name="getNamedOutputFormatClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns the named output OutputFormat.
@param conf job conf
@param namedOutput named output
@return namedOutput OutputFormat]]>
</doc>
</method>
<method name="getNamedOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns the key class for a named output.
@param conf job conf
@param namedOutput named output
@return class for the named output key]]>
</doc>
</method>
<method name="getNamedOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<doc>
<![CDATA[Returns the value class for a named output.
@param conf job conf
@param namedOutput named output
@return class of named output value]]>
</doc>
</method>
<method name="addNamedOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<param name="outputFormatClass" type="java.lang.Class"/>
<param name="keyClass" type="java.lang.Class"/>
<param name="valueClass" type="java.lang.Class"/>
<doc>
<![CDATA[Adds a named output for the job.
<p/>
@param conf job conf to add the named output
@param namedOutput named output name, it has to be a word, letters
and numbers only, cannot be the word 'part' as
that is reserved for the
default output.
@param outputFormatClass OutputFormat class.
@param keyClass key class
@param valueClass value class]]>
</doc>
</method>
<method name="addMultiNamedOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="namedOutput" type="java.lang.String"/>
<param name="outputFormatClass" type="java.lang.Class"/>
<param name="keyClass" type="java.lang.Class"/>
<param name="valueClass" type="java.lang.Class"/>
<doc>
<![CDATA[Adds a multi named output for the job.
<p/>
@param conf job conf to add the named output
@param namedOutput named output name, it has to be a word, letters
and numbers only, cannot be the word 'part' as
that is reserved for the
default output.
@param outputFormatClass OutputFormat class.
@param keyClass key class
@param valueClass value class]]>
</doc>
</method>
<method name="setCountersEnabled"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="enabled" type="boolean"/>
<doc>
<![CDATA[Enables or disables counters for the named outputs.
<p/>
By default these counters are disabled.
<p/>
MultipleOutputs supports counters, by default the are disabled.
The counters group is the {@link MultipleOutputs} class name.
</p>
The names of the counters are the same as the named outputs. For multi
named outputs the name of the counter is the concatenation of the named
output, and underscore '_' and the multiname.
@param conf job conf to enableadd the named output.
@param enabled indicates if the counters will be enabled or not.]]>
</doc>
</method>
<method name="getCountersEnabled" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Returns if the counters for the named outputs are enabled or not.
<p/>
By default these counters are disabled.
<p/>
MultipleOutputs supports counters, by default the are disabled.
The counters group is the {@link MultipleOutputs} class name.
</p>
The names of the counters are the same as the named outputs. For multi
named outputs the name of the counter is the concatenation of the named
output, and underscore '_' and the multiname.
@param conf job conf to enableadd the named output.
@return TRUE if the counters are enabled, FALSE if they are disabled.]]>
</doc>
</method>
<method name="getNamedOutputs" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns iterator with the defined name outputs.
@return iterator with the defined named outputs]]>
</doc>
</method>
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="namedOutput" type="java.lang.String"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the output collector for a named output.
<p/>
@param namedOutput the named output name
@param reporter the reporter
@return the output collector for the given named output
@throws IOException thrown if output collector could not be created]]>
</doc>
</method>
<method name="getCollector" return="org.apache.hadoop.mapred.OutputCollector"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="namedOutput" type="java.lang.String"/>
<param name="multiName" type="java.lang.String"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the output collector for a multi named output.
<p/>
@param namedOutput the named output name
@param multiName the multi name part
@param reporter the reporter
@return the output collector for the given named output
@throws IOException thrown if output collector could not be created]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Closes all the opened named outputs.
<p/>
If overriden subclasses must invoke <code>super.close()</code> at the
end of their <code>close()</code>
@throws java.io.IOException thrown if any of the MultipleOutput files
could not be closed properly.]]>
</doc>
</method>
<doc>
<![CDATA[The MultipleOutputs class simplifies writting to additional outputs other
than the job default output via the <code>OutputCollector</code> passed to
the <code>map()</code> and <code>reduce()</code> methods of the
<code>Mapper</code> and <code>Reducer</code> implementations.
<p/>
Each additional output, or named output, may be configured with its own
<code>OutputFormat</code>, with its own key class and with its own value
class.
<p/>
A named output can be a single file or a multi file. The later is refered as
a multi named output.
<p/>
A multi named output is an unbound set of files all sharing the same
<code>OutputFormat</code>, key class and value class configuration.
<p/>
When named outputs are used within a <code>Mapper</code> implementation,
key/values written to a name output are not part of the reduce phase, only
key/values written to the job <code>OutputCollector</code> are part of the
reduce phase.
<p/>
MultipleOutputs supports counters, by default the are disabled. The counters
group is the {@link MultipleOutputs} class name.
</p>
The names of the counters are the same as the named outputs. For multi
named outputs the name of the counter is the concatenation of the named
output, and underscore '_' and the multiname.
<p/>
Job configuration usage pattern is:
<pre>
JobConf conf = new JobConf();
conf.setInputPath(inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setMapperClass(MOMap.class);
conf.setReducerClass(MOReduce.class);
...
// Defines additional single text based output 'text' for the job
MultipleOutputs.addNamedOutput(conf, "text", TextOutputFormat.class,
LongWritable.class, Text.class);
// Defines additional multi sequencefile based output 'sequence' for the
// job
MultipleOutputs.addMultiNamedOutput(conf, "seq",
SequenceFileOutputFormat.class,
LongWritable.class, Text.class);
...
JobClient jc = new JobClient();
RunningJob job = jc.submitJob(conf);
...
</pre>
<p/>
Job configuration usage pattern is:
<pre>
public class MOReduce implements
Reducer&lt;WritableComparable, Writable&gt; {
private MultipleOutputs mos;
public void configure(JobConf conf) {
...
mos = new MultipleOutputs(conf);
}
public void reduce(WritableComparable key, Iterator&lt;Writable&gt; values,
OutputCollector output, Reporter reporter)
throws IOException {
...
mos.getCollector("text", reporter).collect(key, new Text("Hello"));
mos.getCollector("seq", "A", reporter).collect(key, new Text("Bye"));
mos.getCollector("seq", "B", reporter).collect(key, new Text("Chau"));
...
}
public void close() throws IOException {
mos.close();
...
}
}
</pre>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleOutputs -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
<class name="MultipleSequenceFileOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleSequenceFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output data
to different output files in sequence file output format.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleSequenceFileOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
<class name="MultipleTextOutputFormat" extends="org.apache.hadoop.mapred.lib.MultipleOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultipleTextOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getBaseRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="fs" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="arg3" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[This class extends the MultipleOutputFormat, allowing to write the output
data to different output files in Text output format.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultipleTextOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
<class name="MultithreadedMapRunner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.MapRunnable"/>
<constructor name="MultithreadedMapRunner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobConf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="input" type="org.apache.hadoop.mapred.RecordReader"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapred.MapRunnable.
<p>
It can be used instead of the default implementation,
@link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU
bound in order to improve throughput.
<p>
Map implementations using this MapRunnable must be thread-safe.
<p>
The Map-Reduce job has to be configured to use this MapRunnable class (using
the JobConf.setMapRunnerClass method) and
the number of thread the thread-pool can use with the
<code>mapred.map.multithreadedrunner.threads</code> property, its default
value is 10 threads.
<p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.MultithreadedMapRunner -->
<!-- start class org.apache.hadoop.mapred.lib.NLineInputFormat -->
<class name="NLineInputFormat" extends="org.apache.hadoop.mapred.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="NLineInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="numSplits" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Logically splits the set of input files for the job, splits N lines
of the input as one split.
@see org.apache.hadoop.mapred.FileInputFormat#getSplits(JobConf, int)]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<doc>
<![CDATA[NLineInputFormat which splits N lines of input as one split.
In many "pleasantly" parallel applications, each process/mapper
processes the same input file (s), but with computations are
controlled by different parameters.(Referred to as "parameter sweeps").
One way to achieve this, is to specify a set of parameters
(one set per line) as input in a control file
(which is the input path to the map-reduce application,
where as the input dataset is specified
via a config variable in JobConf.).
The NLineInputFormat can be used in such applications, that splits
the input file such that by default, one line is fed as
a value to one map task, and key is the offset.
i.e. (k,v) is (LongWritable, Text).
The location hints will span the whole mapred cluster.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.NLineInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.NullOutputFormat -->
<class name="NullOutputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use
{@link org.apache.hadoop.mapreduce.lib.output.NullOutputFormat} instead.">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="NullOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="ignored" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<doc>
<![CDATA[Consume all outputs and put them in /dev/null.
@deprecated Use
{@link org.apache.hadoop.mapreduce.lib.output.NullOutputFormat} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.NullOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.RegexMapper -->
<class name="RegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="RegexMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.RegexMapper -->
<!-- start class org.apache.hadoop.mapred.lib.TokenCountMapper -->
<class name="TokenCountMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="Use
{@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="TokenCountMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A {@link Mapper} that maps text values into <token,freq> pairs. Uses
{@link StringTokenizer} to break text into tokens.
@deprecated Use
{@link org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper} instead.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.TokenCountMapper -->
<!-- start class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
<class name="TotalOrderPartitioner" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Partitioner"/>
<constructor name="TotalOrderPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Read in the partition file and build indexing data structures.
If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
<tt>total.order.partitioner.natural.order</tt> is not false, a trie
of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
will be built. Otherwise, keys will be located using a binary search of
the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
defined for this job. The input file must be sorted with the same
comparator and contain {@link
org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.]]>
</doc>
</method>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<param name="value" type="java.lang.Object"/>
<param name="numPartitions" type="int"/>
</method>
<method name="setPartitionFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="p" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the path to the SequenceFile storing the sorted partition keyset.
It must be the case that for <tt>R</tt> reduces, there are <tt>R-1</tt>
keys in the SequenceFile.]]>
</doc>
</method>
<method name="getPartitionFile" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the path to the SequenceFile storing the sorted partition keyset.
@see #setPartitionFile(JobConf,Path)]]>
</doc>
</method>
<field name="DEFAULT_PATH" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[Partitioner effecting a total order by reading split points from
an externally generated source.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.TotalOrderPartitioner -->
</package>
<package name="org.apache.hadoop.mapred.lib.aggregate">
<!-- start class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
<class name="DoubleValueSum" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="DoubleValueSum"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a double value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="double"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a double value.]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="getSum" return="double"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that sums up a sequence of double
values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.DoubleValueSum -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
<class name="LongValueMax" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueMax"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a long value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newVal" type="long"/>
<doc>
<![CDATA[add a value to the aggregator
@param newVal
a long value.]]>
</doc>
</method>
<method name="getVal" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the maximum of
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMax -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
<class name="LongValueMin" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueMin"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a long value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="newVal" type="long"/>
<doc>
<![CDATA[add a value to the aggregator
@param newVal
a long value.]]>
</doc>
</method>
<method name="getVal" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the minimum of
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueMin -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
<class name="LongValueSum" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="LongValueSum"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object whose string representation represents a long value.]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="long"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a long value.]]>
</doc>
</method>
<method name="getSum" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that sums up
a sequence of long values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.LongValueSum -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
<class name="StringValueMax" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="StringValueMax"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a string.]]>
</doc>
</method>
<method name="getVal" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the biggest of
a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMax -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
<class name="StringValueMin" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="StringValueMin"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
a string.]]>
</doc>
</method>
<method name="getVal" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the aggregated value]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the aggregated value]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of one element. The element is a string
representation of the aggregated value. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that maintain the smallest of
a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.StringValueMin -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
<class name="UniqValueCount" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="UniqValueCount"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[the default constructor]]>
</doc>
</constructor>
<constructor name="UniqValueCount" type="long"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[constructor
@param maxNum the limit in the number of unique values to keep.]]>
</doc>
</constructor>
<method name="setMaxItems" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="n" type="long"/>
<doc>
<![CDATA[Set the limit on the number of unique values
@param n the desired limit on the number of unique values
@return the new limit on the number of unique values]]>
</doc>
</method>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val
an object.]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return the number of unique objects aggregated]]>
</doc>
</method>
<method name="getUniqueItems" return="java.util.Set"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the set of the unique objects]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return return an array of the unique objects. The return value is
expected to be used by the a combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that dedupes a sequence of objects.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UniqValueCount -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
<class name="UserDefinedValueAggregatorDescriptor" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
<constructor name="UserDefinedValueAggregatorDescriptor" type="java.lang.String, org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@param className the class name of the user defined descriptor class
@param job a configure object used for decriptor configuration]]>
</doc>
</constructor>
<method name="createInstance" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="className" type="java.lang.String"/>
<doc>
<![CDATA[Create an instance of the given class
@param className the name of the class
@return a dynamically created instance of the given class]]>
</doc>
</method>
<method name="generateKeyValPairs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[Generate a list of aggregation-id/value pairs for the given key/value pairs
by delegating the invocation to the real object.
@param key
input key
@param val
input value
@return a list of aggregation id/value pairs. An aggregation id encodes an
aggregation type which is used to guide the way to aggregate the
value in the reduce/combiner phrase of an Aggregate based job.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of this object.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Do nothing.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a wrapper for a user defined value aggregator descriptor.
It servs two functions: One is to create an object of ValueAggregatorDescriptor from the
name of a user defined class that may be dynamically loaded. The other is to
deligate inviokations of generateKeyValPairs function to the created object.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.UserDefinedValueAggregatorDescriptor -->
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
<interface name="ValueAggregator" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add a value to the aggregator
@param val the value to be added]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of the agregator]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return an array of values as the outputs of the combiner.]]>
</doc>
</method>
<doc>
<![CDATA[This interface defines the minimal protocol for value aggregators.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregator -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
<class name="ValueAggregatorBaseDescriptor" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor"/>
<constructor name="ValueAggregatorBaseDescriptor"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="generateEntry" return="java.util.Map.Entry"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="java.lang.String"/>
<param name="id" type="java.lang.String"/>
<param name="val" type="org.apache.hadoop.io.Text"/>
<doc>
<![CDATA[@param type the aggregation type
@param id the aggregation id
@param val the val associated with the id to be aggregated
@return an Entry whose key is the aggregation id prefixed with
the aggregation type.]]>
</doc>
</method>
<method name="generateValueAggregator" return="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="type" type="java.lang.String"/>
<doc>
<![CDATA[@param type the aggregation type
@return a value aggregator of the given type.]]>
</doc>
</method>
<method name="generateKeyValPairs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[Generate 1 or 2 aggregation-id/value pairs for the given key/value pair.
The first id will be of type LONG_VALUE_SUM, with "record_count" as
its aggregation id. If the input is a file split,
the second id of the same type will be generated too, with the file name
as its aggregation id. This achieves the behavior of counting the total number
of records in the input data, and the number of records in each input file.
@param key
input key
@param val
input value
@return a list of aggregation id/value pairs. An aggregation id encodes an
aggregation type which is used to guide the way to aggregate the
value in the reduce/combiner phrase of an Aggregate based job.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[get the input file name.
@param job a job configuration object]]>
</doc>
</method>
<field name="UNIQ_VALUE_COUNT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_SUM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="DOUBLE_VALUE_SUM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="VALUE_HISTOGRAM" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_MAX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="LONG_VALUE_MIN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="STRING_VALUE_MAX" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="STRING_VALUE_MIN" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="inputFile" type="java.lang.String"
transient="false" volatile="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This class implements the common functionalities of
the subclasses of ValueAggregatorDescriptor class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorBaseDescriptor -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
<class name="ValueAggregatorCombiner" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorCombiner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Combiner does not need to configure.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Combines values for a given key.
@param key the key is expected to be a Text object, whose prefix indicates
the type of aggregation to aggregate the values.
@param values the values to combine
@param output to collect combined values]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing.]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="org.apache.hadoop.io.WritableComparable"/>
<param name="arg1" type="org.apache.hadoop.io.Writable"/>
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing. Should not be called.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic combiner of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorCombiner -->
<!-- start interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
<interface name="ValueAggregatorDescriptor" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="generateKeyValPairs" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[Generate a list of aggregation-id/value pairs for the given key/value pair.
This function is usually called by the mapper of an Aggregate based job.
@param key
input key
@param val
input value
@return a list of aggregation id/value pairs. An aggregation id encodes an
aggregation type which is used to guide the way to aggregate the
value in the reduce/combiner phrase of an Aggregate based job.]]>
</doc>
</method>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Configure the object
@param job
a JobConf object that may contain the information that can be used
to configure the object.]]>
</doc>
</method>
<field name="TYPE_SEPARATOR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="ONE" type="org.apache.hadoop.io.Text"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This interface defines the contract a value aggregator descriptor must
support. Such a descriptor can be configured with a JobConf object. Its main
function is to generate a list of aggregation-id/value pairs. An aggregation
id encodes an aggregation type which is used to guide the way to aggregate
the value in the reduce/combiner phrase of an Aggregate based job.The mapper in
an Aggregate based map/reduce job may create one or more of
ValueAggregatorDescriptor objects at configuration time. For each input
key/value pair, the mapper will use those objects to create aggregation
id/value pairs.]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorDescriptor -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
<class name="ValueAggregatorJob" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorJob"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="descriptors" type="java.lang.Class[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createValueAggregatorJobs" return="org.apache.hadoop.mapred.jobcontrol.JobControl"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create an Aggregate based map/reduce job.
@param args the arguments used for job creation. Generic hadoop
arguments are accepted.
@return a JobConf object ready for submission.
@throws IOException
@see GenericOptionsParser]]>
</doc>
</method>
<method name="createValueAggregatorJob" return="org.apache.hadoop.mapred.JobConf"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<param name="descriptors" type="java.lang.Class[]"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setAggregatorDescriptors"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="descriptors" type="java.lang.Class[]"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[create and run an Aggregate based map/reduce job.
@param args the arguments used for job creation
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[This is the main class for creating a map/reduce job using Aggregate
framework. The Aggregate is a specialization of map/reduce framework,
specilizing for performing various simple aggregations.
Generally speaking, in order to implement an application using Map/Reduce
model, the developer is to implement Map and Reduce functions (and possibly
combine function). However, a lot of applications related to counting and
statistics computing have very similar characteristics. Aggregate abstracts
out the general patterns of these functions and implementing those patterns.
In particular, the package provides generic mapper/redducer/combiner classes,
and a set of built-in value aggregators, and a generic utility class that
helps user create map/reduce jobs using the generic class. The built-in
aggregators include:
sum over numeric values count the number of distinct values compute the
histogram of values compute the minimum, maximum, media,average, standard
deviation of numeric values
The developer using Aggregate will need only to provide a plugin class
conforming to the following interface:
public interface ValueAggregatorDescriptor { public ArrayList<Entry>
generateKeyValPairs(Object key, Object value); public void
configure(JobConfjob); }
The package also provides a base class, ValueAggregatorBaseDescriptor,
implementing the above interface. The user can extend the base class and
implement generateKeyValPairs accordingly.
The primary work of generateKeyValPairs is to emit one or more key/value
pairs based on the input key/value pair. The key in an output key/value pair
encode two pieces of information: aggregation type and aggregation id. The
value will be aggregated onto the aggregation id according the aggregation
type.
This class offers a function to generate a map/reduce job using Aggregate
framework. The function takes the following parameters: input directory spec
input format (text or sequence file) output directory a file specifying the
user plugin class]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJob -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
<class name="ValueAggregatorJobBase" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<implements name="org.apache.hadoop.mapred.Reducer"/>
<constructor name="ValueAggregatorJobBase"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="logSpec"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="aggregatorDescriptorList" type="java.util.ArrayList"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[This abstract class implements some common functionalities of the
the generic mapper, reducer and combiner classes of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
<class name="ValueAggregatorMapper" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<param name="value" type="org.apache.hadoop.io.Writable"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[the map function. It iterates through the value aggregator descriptor
list to generate aggregation id/value pairs and emit them.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="org.apache.hadoop.io.Text"/>
<param name="arg1" type="java.util.Iterator"/>
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing. Should not be called.]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic mapper of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorMapper -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
<class name="ValueAggregatorReducer" extends="org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorJobBase"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ValueAggregatorReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.Text"/>
<param name="values" type="java.util.Iterator"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@param key
the key is expected to be a Text object, whose prefix indicates
the type of aggregation to aggregate the values. In effect, data
driven computing is achieved. It is assumed that each aggregator's
getReport method emits appropriate output for the aggregator. This
may be further customiized.
@value the values to be aggregated]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="org.apache.hadoop.io.WritableComparable"/>
<param name="arg1" type="org.apache.hadoop.io.Writable"/>
<param name="arg2" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="arg3" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Do nothing. Should not be called]]>
</doc>
</method>
<doc>
<![CDATA[This class implements the generic reducer of Aggregate.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueAggregatorReducer -->
<!-- start class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
<class name="ValueHistogram" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.aggregate.ValueAggregator"/>
<constructor name="ValueHistogram"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="addNextValue"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="val" type="java.lang.Object"/>
<doc>
<![CDATA[add the given val to the aggregator.
@param val the value to be added. It is expected to be a string
in the form of xxxx\tnum, meaning xxxx has num occurrences.]]>
</doc>
</method>
<method name="getReport" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return the string representation of this aggregator.
It includes the following basic statistics of the histogram:
the number of unique values
the minimum value
the media value
the maximum value
the average value
the standard deviation]]>
</doc>
</method>
<method name="getReportDetails" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return a string representation of the list of value/frequence pairs of
the histogram]]>
</doc>
</method>
<method name="getCombinerOutput" return="java.util.ArrayList"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return a list value/frequence pairs.
The return value is expected to be used by the reducer.]]>
</doc>
</method>
<method name="getReportItems" return="java.util.TreeMap"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return a TreeMap representation of the histogram]]>
</doc>
</method>
<method name="reset"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[reset the aggregator]]>
</doc>
</method>
<doc>
<![CDATA[This class implements a value aggregator that computes the
histogram of a sequence of strings.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.aggregate.ValueHistogram -->
</package>
<package name="org.apache.hadoop.mapred.lib.db">
<!-- start class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
<class name="DBConfiguration" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="configureDB"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="driverClass" type="java.lang.String"/>
<param name="dbUrl" type="java.lang.String"/>
<param name="userName" type="java.lang.String"/>
<param name="passwd" type="java.lang.String"/>
<doc>
<![CDATA[Sets the DB access related fields in the JobConf.
@param job the job
@param driverClass JDBC Driver class name
@param dbUrl JDBC DB access URL.
@param userName DB access username
@param passwd DB access passwd]]>
</doc>
</method>
<method name="configureDB"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="driverClass" type="java.lang.String"/>
<param name="dbUrl" type="java.lang.String"/>
<doc>
<![CDATA[Sets the DB access related fields in the JobConf.
@param job the job
@param driverClass JDBC Driver class name
@param dbUrl JDBC DB access URL.]]>
</doc>
</method>
<field name="DRIVER_CLASS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The JDBC Driver class name]]>
</doc>
</field>
<field name="URL_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[JDBC Database access URL]]>
</doc>
</field>
<field name="USERNAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[User name to access the database]]>
</doc>
</field>
<field name="PASSWORD_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Password to access the database]]>
</doc>
</field>
<field name="INPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Input table name]]>
</doc>
</field>
<field name="INPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Field names in the Input table]]>
</doc>
</field>
<field name="INPUT_CONDITIONS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[WHERE clause in the input SELECT statement]]>
</doc>
</field>
<field name="INPUT_ORDER_BY_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[ORDER BY clause in the input SELECT statement]]>
</doc>
</field>
<field name="INPUT_QUERY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Whole input query, exluding LIMIT...OFFSET]]>
</doc>
</field>
<field name="INPUT_COUNT_QUERY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Input query to get the count of records]]>
</doc>
</field>
<field name="INPUT_CLASS_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Class name implementing DBWritable which will hold input tuples]]>
</doc>
</field>
<field name="OUTPUT_TABLE_NAME_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Output table name]]>
</doc>
</field>
<field name="OUTPUT_FIELD_NAMES_PROPERTY" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Field names in the Output table]]>
</doc>
</field>
<doc>
<![CDATA[A container for configuration property names for jobs with DB input/output.
<br>
The job can be configured using the static methods in this class,
{@link DBInputFormat}, and {@link DBOutputFormat}.
<p>
Alternatively, the properties can be set in the configuration with proper
values.
@see DBConfiguration#configureDB(JobConf, String, String, String, String)
@see DBInputFormat#setInput(JobConf, Class, String, String)
@see DBInputFormat#setInput(JobConf, Class, String, String, String, String...)
@see DBOutputFormat#setOutput(JobConf, String, String...)]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBConfiguration -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
<class name="DBInputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputFormat"/>
<implements name="org.apache.hadoop.mapred.JobConfigurable"/>
<constructor name="DBInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getRecordReader" return="org.apache.hadoop.mapred.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapred.InputSplit"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getSplits" return="org.apache.hadoop.mapred.InputSplit[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="chunks" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getCountQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the query for getting the total number of rows,
subclasses can override this for custom behaviour.]]>
</doc>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="tableName" type="java.lang.String"/>
<param name="conditions" type="java.lang.String"/>
<param name="orderBy" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
@param job The job
@param inputClass the class object implementing DBWritable, which is the
Java object holding tuple fields.
@param tableName The table to read data from
@param conditions The condition which to select data with, eg. '(updated >
20070101 AND length > 0)'
@param orderBy the fieldNames in the orderBy clause.
@param fieldNames The field names in the table
@see #setInput(JobConf, Class, String, String)]]>
</doc>
</method>
<method name="setInput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="inputClass" type="java.lang.Class"/>
<param name="inputQuery" type="java.lang.String"/>
<param name="inputCountQuery" type="java.lang.String"/>
<doc>
<![CDATA[Initializes the map-part of the job with the appropriate input settings.
@param job The job
@param inputClass the class object implementing DBWritable, which is the
Java object holding tuple fields.
@param inputQuery the input query to select fields. Example :
"SELECT f1, f2, f3 FROM Mytable ORDER BY f1"
@param inputCountQuery the input query that returns the number of records in
the table.
Example : "SELECT COUNT(f1) FROM Mytable"
@see #setInput(JobConf, Class, String, String, String, String...)]]>
</doc>
</method>
<doc>
<![CDATA[A InputFormat that reads input data from an SQL table.
<p>
DBInputFormat emits LongWritables containing the record number as
key and DBWritables as value.
The SQL query, and input class can be using one of the two
setInput methods.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit -->
<class name="DBInputFormat.DBInputSplit" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.InputSplit"/>
<constructor name="DBInputFormat.DBInputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Default Constructor]]>
</doc>
</constructor>
<constructor name="DBInputFormat.DBInputSplit" type="long, long"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Convenience Constructor
@param start the index of the first row to select
@param end the index of the last row to select]]>
</doc>
</constructor>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getStart" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return The index of the first row to select]]>
</doc>
</method>
<method name="getEnd" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[@return The index of the last row to select]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[@return The total row count in this split]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="input" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="output" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<doc>
<![CDATA[A InputSplit that spans a set of rows]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader -->
<class name="DBInputFormat.DBRecordReader" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordReader"/>
<constructor name="DBInputFormat.DBRecordReader" type="org.apache.hadoop.mapred.lib.db.DBInputFormat.DBInputSplit, java.lang.Class, org.apache.hadoop.mapred.JobConf"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[@param split The InputSplit to read data for
@throws SQLException]]>
</doc>
</constructor>
<method name="getSelectQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the query for selecting the records,
subclasses can override this for custom behaviour.]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="createKey" return="org.apache.hadoop.io.LongWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="createValue" return="org.apache.hadoop.mapred.lib.db.DBWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getPos" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="next" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.LongWritable"/>
<param name="value" type="org.apache.hadoop.mapred.lib.db.DBWritable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<doc>
<![CDATA[A RecordReader that reads records from a SQL table.
Emits LongWritables containing the record number as
key and DBWritables as value.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.DBRecordReader -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable -->
<class name="DBInputFormat.NullDBWritable" extends="java.lang.Object"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.lib.db.DBWritable"/>
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="DBInputFormat.NullDBWritable"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="java.sql.ResultSet"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="arg0" type="java.sql.PreparedStatement"/>
<exception name="SQLException" type="java.sql.SQLException"/>
</method>
<doc>
<![CDATA[A Class that does nothing, implementing DBWritable]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBInputFormat.NullDBWritable -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
<class name="DBOutputFormat" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.OutputFormat"/>
<constructor name="DBOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="constructQuery" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="table" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Constructs the query used as the prepared statement to insert data.
@param table
the table to insert into
@param fieldNames
the fields to insert into. If field names are unknown, supply an
array of nulls.]]>
</doc>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapred.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="filesystem" type="org.apache.hadoop.fs.FileSystem"/>
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="name" type="java.lang.String"/>
<param name="progress" type="org.apache.hadoop.util.Progressable"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="setOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
<param name="tableName" type="java.lang.String"/>
<param name="fieldNames" type="java.lang.String[]"/>
<doc>
<![CDATA[Initializes the reduce-part of the job with the appropriate output settings
@param job
The job
@param tableName
The table to insert data into
@param fieldNames
The field names in the table. If unknown, supply the appropriate
number of nulls.]]>
</doc>
</method>
<doc>
<![CDATA[A OutputFormat that sends the reduce output to a SQL table.
<p>
{@link DBOutputFormat} accepts &lt;key,value&gt; pairs, where
key has a type extending DBWritable. Returned {@link RecordWriter}
writes <b>only the key</b> to the database with a batch SQL query.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat -->
<!-- start class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter -->
<class name="DBOutputFormat.DBRecordWriter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.RecordWriter"/>
<constructor name="DBOutputFormat.DBRecordWriter" type="java.sql.Connection, java.sql.PreparedStatement"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<exception name="SQLException" type="java.sql.SQLException"/>
</constructor>
<method name="close"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.mapred.lib.db.DBWritable"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[{@inheritDoc}]]>
</doc>
</method>
<doc>
<![CDATA[A RecordWriter that writes the reduce output to a SQL table]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.lib.db.DBOutputFormat.DBRecordWriter -->
<!-- start interface org.apache.hadoop.mapred.lib.db.DBWritable -->
<interface name="DBWritable" abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="statement" type="java.sql.PreparedStatement"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[Sets the fields of the object in the {@link PreparedStatement}.
@param statement the statement that the fields are put into.
@throws SQLException]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="resultSet" type="java.sql.ResultSet"/>
<exception name="SQLException" type="java.sql.SQLException"/>
<doc>
<![CDATA[Reads the fields of the object from the {@link ResultSet}.
@param resultSet the {@link ResultSet} to get the fields from.
@throws SQLException]]>
</doc>
</method>
<doc>
<![CDATA[Objects that are read from/written to a database should implement
<code>DBWritable</code>. DBWritable, is similar to {@link Writable}
except that the {@link #write(PreparedStatement)} method takes a
{@link PreparedStatement}, and {@link #readFields(ResultSet)}
takes a {@link ResultSet}.
<p>
Implementations are responsible for writing the fields of the object
to PreparedStatement, and reading the fields of the object from the
ResultSet.
<p>Example:</p>
If we have the following table in the database :
<pre>
CREATE TABLE MyTable (
counter INTEGER NOT NULL,
timestamp BIGINT NOT NULL,
);
</pre>
then we can read/write the tuples from/to the table with :
<p><pre>
public class MyWritable implements Writable, DBWritable {
// Some data
private int counter;
private long timestamp;
//Writable#write() implementation
public void write(DataOutput out) throws IOException {
out.writeInt(counter);
out.writeLong(timestamp);
}
//Writable#readFields() implementation
public void readFields(DataInput in) throws IOException {
counter = in.readInt();
timestamp = in.readLong();
}
public void write(PreparedStatement statement) throws SQLException {
statement.setInt(1, counter);
statement.setLong(2, timestamp);
}
public void readFields(ResultSet resultSet) throws SQLException {
counter = resultSet.getInt(1);
timestamp = resultSet.getLong(2);
}
}
</pre></p>]]>
</doc>
</interface>
<!-- end interface org.apache.hadoop.mapred.lib.db.DBWritable -->
</package>
<package name="org.apache.hadoop.mapred.pipes">
<!-- start class org.apache.hadoop.mapred.pipes.Submitter -->
<class name="Submitter" extends="org.apache.hadoop.conf.Configured"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="Submitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="Submitter" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getExecutable" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Get the URI of the application's executable.
@param conf
@return the URI where the application's executable is located]]>
</doc>
</method>
<method name="setExecutable"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="executable" type="java.lang.String"/>
<doc>
<![CDATA[Set the URI for the application's executable. Normally this is a hdfs:
location.
@param conf
@param executable The URI of the application's executable.]]>
</doc>
</method>
<method name="setIsJavaRecordReader"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the job is using a Java RecordReader.
@param conf the configuration to modify
@param value the new value]]>
</doc>
</method>
<method name="getIsJavaRecordReader" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Check whether the job is using a Java RecordReader
@param conf the configuration to check
@return is it a Java RecordReader?]]>
</doc>
</method>
<method name="setIsJavaMapper"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the Mapper is written in Java.
@param conf the configuration to modify
@param value the new value]]>
</doc>
</method>
<method name="getIsJavaMapper" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Check whether the job is using a Java Mapper.
@param conf the configuration to check
@return is it a Java Mapper?]]>
</doc>
</method>
<method name="setIsJavaReducer"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the Reducer is written in Java.
@param conf the configuration to modify
@param value the new value]]>
</doc>
</method>
<method name="getIsJavaReducer" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Check whether the job is using a Java Reducer.
@param conf the configuration to check
@return is it a Java Reducer?]]>
</doc>
</method>
<method name="setIsJavaRecordWriter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="value" type="boolean"/>
<doc>
<![CDATA[Set whether the job will use a Java RecordWriter.
@param conf the configuration to modify
@param value the new value to set]]>
</doc>
</method>
<method name="getIsJavaRecordWriter" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Will the reduce use a Java RecordWriter?
@param conf the configuration to check
@return true, if the output of the job will be written by Java]]>
</doc>
</method>
<method name="getKeepCommandFile" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<doc>
<![CDATA[Does the user want to keep the command file for debugging? If this is
true, pipes will write a copy of the command data to a file in the
task directory named "downlink.data", which may be used to run the C++
program under the debugger. You probably also want to set
JobConf.setKeepFailedTaskFiles(true) to keep the entire directory from
being deleted.
To run using the data file, set the environment variable
"hadoop.pipes.command.file" to point to the file.
@param conf the configuration to check
@return will the framework save the command file?]]>
</doc>
</method>
<method name="setKeepCommandFile"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<param name="keep" type="boolean"/>
<doc>
<![CDATA[Set whether to keep the command file for debugging
@param conf the configuration to modify
@param keep the new value]]>
</doc>
</method>
<method name="submitJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="Use {@link Submitter#runJob(JobConf)}">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
to the job to run under pipes are made to the configuration.
@param conf the job to submit to the cluster (MODIFIED)
@throws IOException
@deprecated Use {@link Submitter#runJob(JobConf)}]]>
</doc>
</method>
<method name="runJob" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the map/reduce cluster. All of the necessary modifications
to the job to run under pipes are made to the configuration.
@param conf the job to submit to the cluster (MODIFIED)
@throws IOException]]>
</doc>
</method>
<method name="jobSubmit" return="org.apache.hadoop.mapred.RunningJob"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.mapred.JobConf"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Submit a job to the Map-Reduce framework.
This returns a handle to the {@link RunningJob} which can be used to track
the running-job.
@param conf the job configuration.
@return a handle to the {@link RunningJob} which can be used to track the
running-job.
@throws IOException]]>
</doc>
</method>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<doc>
<![CDATA[Submit a pipes job based on the command line arguments.
@param args]]>
</doc>
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[The main entry point and job submitter. It may either be used as a command
line-based or API-based method to launch Pipes jobs.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.pipes.Submitter -->
</package>
<package name="org.apache.hadoop.mapred.tools">
<!-- start class org.apache.hadoop.mapred.tools.MRAdmin -->
<class name="MRAdmin" extends="org.apache.hadoop.conf.Configured"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="MRAdmin"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="MRAdmin" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<doc>
<![CDATA[Administrative access to Hadoop Map-Reduce.
Currently it only provides the ability to connect to the {@link JobTracker}
and refresh the service-level authorization policy.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapred.tools.MRAdmin -->
</package>
<package name="org.apache.hadoop.mapreduce">
<!-- start class org.apache.hadoop.mapreduce.Counter -->
<class name="Counter" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="Counter"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<constructor name="Counter" type="java.lang.String, java.lang.String"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="setDisplayName"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="displayName" type="java.lang.String"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read the binary representation of the counter]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write the binary representation of the counter]]>
</doc>
</method>
<method name="getName" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getDisplayName" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the name of the counter.
@return the user facing name of the counter]]>
</doc>
</method>
<method name="getValue" return="long"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[What is the current value of this counter?
@return the current value]]>
</doc>
</method>
<method name="increment"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="incr" type="long"/>
<doc>
<![CDATA[Increment this counter by the given value
@param incr the value to increase this counter by]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericRight" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A named counter that tracks the progress of a map/reduce job.
<p><code>Counters</code> represent global counters, defined either by the
Map-Reduce framework or applications. Each <code>Counter</code> is named by
an {@link Enum} and has a long for the value.</p>
<p><code>Counters</code> are bunched into Groups, each comprising of
counters from a particular <code>Enum</code> class.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Counter -->
<!-- start class org.apache.hadoop.mapreduce.CounterGroup -->
<class name="CounterGroup" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<constructor name="CounterGroup" type="java.lang.String"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<constructor name="CounterGroup" type="java.lang.String, java.lang.String"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="getName" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the internal name of the group
@return the internal name]]>
</doc>
</method>
<method name="getDisplayName" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the display name of the group.
@return the human readable name]]>
</doc>
</method>
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
<param name="displayName" type="java.lang.String"/>
<doc>
<![CDATA[Internal to find a counter in a group.
@param counterName the name of the counter
@param displayName the display name of the counter
@return the counter that was found or added]]>
</doc>
</method>
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.String"/>
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="size" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the number of counters in this group.]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericRight" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="incrAllCounters"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="rightGroup" type="org.apache.hadoop.mapreduce.CounterGroup"/>
</method>
<doc>
<![CDATA[A group of {@link Counter}s that logically belong together. Typically,
it is an {@link Enum} subclass and the counters are the values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.CounterGroup -->
<!-- start class org.apache.hadoop.mapreduce.Counters -->
<class name="Counters" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<implements name="java.lang.Iterable"/>
<constructor name="Counters"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<param name="counterName" type="java.lang.String"/>
</method>
<method name="findCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Enum"/>
<doc>
<![CDATA[Find the counter for the given enum. The same enum will always return the
same counter.
@param key the counter key
@return the matching counter object]]>
</doc>
</method>
<method name="getGroupNames" return="java.util.Collection"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the names of all counter classes.
@return Set of counter names.]]>
</doc>
</method>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getGroup" return="org.apache.hadoop.mapreduce.CounterGroup"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<doc>
<![CDATA[Returns the named counter group, or an empty group if there is none
with the specified name.]]>
</doc>
</method>
<method name="countCounters" return="int"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the total number of counters, by summing the number of counters
in each group.]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Write the set of groups.
The external format is:
#groups (groupName group)*
i.e. the number of groups followed by 0 or more groups, where each
group is of the form:
groupDisplayName #counters (false | true counter)*
where each counter is of the form:
name (false | true displayName) value]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Read a set of groups.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return textual representation of the counter values.]]>
</doc>
</method>
<method name="incrAllCounters"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="other" type="org.apache.hadoop.mapreduce.Counters"/>
<doc>
<![CDATA[Increments multiple counters by their amounts in another Counters
instance.
@param other the other Counters instance]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericRight" type="java.lang.Object"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.Counters -->
<!-- start class org.apache.hadoop.mapreduce.ID -->
<class name="ID" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.WritableComparable"/>
<constructor name="ID" type="int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[constructs an ID object from the given int]]>
</doc>
</constructor>
<constructor name="ID"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="getId" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[returns the int which represents the identifier]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="that" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare IDs by associated numbers]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="SEPARATOR" type="char"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="id" type="int"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A general identifier, which internally stores the id
as an integer. This is the super class of {@link JobID},
{@link TaskID} and {@link TaskAttemptID}.
@see JobID
@see TaskID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.ID -->
<!-- start class org.apache.hadoop.mapreduce.InputFormat -->
<class name="InputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getSplits" return="java.util.List"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Logically split the set of input files for the job.
<p>Each {@link InputSplit} is then assigned to an individual {@link Mapper}
for processing.</p>
<p><i>Note</i>: The split is a <i>logical</i> split of the inputs and the
input files are not physically split into chunks. For e.g. a split could
be <i>&lt;input-file-path, start, offset&gt;</i> tuple. The InputFormat
also creates the {@link RecordReader} to read the {@link InputSplit}.
@param context job configuration.
@return an array of {@link InputSplit}s for the job.]]>
</doc>
</method>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Create a record reader for a given split. The framework will call
{@link RecordReader#initialize(InputSplit, TaskAttemptContext)} before
the split is used.
@param split the split to be read
@param context the information about the task
@return a new record reader
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputFormat</code> describes the input-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>InputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the input-specification of the job.
<li>
Split-up the input file(s) into logical {@link InputSplit}s, each of
which is then assigned to an individual {@link Mapper}.
</li>
<li>
Provide the {@link RecordReader} implementation to be used to glean
input records from the logical <code>InputSplit</code> for processing by
the {@link Mapper}.
</li>
</ol>
<p>The default behavior of file-based {@link InputFormat}s, typically
sub-classes of {@link FileInputFormat}, is to split the
input into <i>logical</i> {@link InputSplit}s based on the total size, in
bytes, of the input files. However, the {@link FileSystem} blocksize of
the input files is treated as an upper bound for input splits. A lower bound
on the split size can be set via
<a href="{@docRoot}/../mapred-default.html#mapred.min.split.size">
mapred.min.split.size</a>.</p>
<p>Clearly, logical splits based on input-size is insufficient for many
applications since record boundaries are to respected. In such cases, the
application has to also implement a {@link RecordReader} on whom lies the
responsibility to respect record-boundaries and present a record-oriented
view of the logical <code>InputSplit</code> to the individual task.
@see InputSplit
@see RecordReader
@see FileInputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.InputFormat -->
<!-- start class org.apache.hadoop.mapreduce.InputSplit -->
<class name="InputSplit" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getLength" return="long"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the size of the split, so that the input splits can be sorted by size.
@return the number of bytes in the split
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the list of nodes by name where the data for the split would be local.
The locations do not need to be serialized.
@return a new array of the node nodes.
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<doc>
<![CDATA[<code>InputSplit</code> represents the data to be processed by an
individual {@link Mapper}.
<p>Typically, it presents a byte-oriented view on the input and is the
responsibility of {@link RecordReader} of the job to process this and present
a record-oriented view.
@see InputFormat
@see RecordReader]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.InputSplit -->
<!-- start class org.apache.hadoop.mapreduce.Job -->
<class name="Job" extends="org.apache.hadoop.mapreduce.JobContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Job"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<constructor name="Job" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<constructor name="Job" type="org.apache.hadoop.conf.Configuration, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="setNumReduceTasks"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="tasks" type="int"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the number of reduce tasks for the job.
@param tasks the number of reduce tasks
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setWorkingDirectory"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="dir" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the current working directory for the default file system.
@param dir the new current working directory.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setInputFormatClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link InputFormat} for the job.
@param cls the <code>InputFormat</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setOutputFormatClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link OutputFormat} for the job.
@param cls the <code>OutputFormat</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setMapperClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link Mapper} for the job.
@param cls the <code>Mapper</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setJarByClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<doc>
<![CDATA[Set the Jar by finding where a given class came from.
@param cls the example class]]>
</doc>
</method>
<method name="getJar" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the pathname of the job's jar.
@return the pathname]]>
</doc>
</method>
<method name="setCombinerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the combiner class for the job.
@param cls the combiner to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setReducerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link Reducer} for the job.
@param cls the <code>Reducer</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setPartitionerClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the {@link Partitioner} for the job.
@param cls the <code>Partitioner</code> to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setMapOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the key class for the map output data. This allows the user to
specify the map output key class to be different than the final output
value class.
@param theClass the map output key class.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setMapOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the value class for the map output data. This allows the user to
specify the map output value class to be different than the final output
value class.
@param theClass the map output value class.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setOutputKeyClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the key class for the job output data.
@param theClass the key class for the job output data.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setOutputValueClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="theClass" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the value class for job outputs.
@param theClass the value class for job outputs.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setSortComparatorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Define the comparator that controls how the keys are sorted before they
are passed to the {@link Reducer}.
@param cls the raw comparator
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setGroupingComparatorClass"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="cls" type="java.lang.Class"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Define the comparator that controls which keys are grouped together
for a single call to
{@link Reducer#reduce(Object, Iterable,
org.apache.hadoop.mapreduce.Reducer.Context)}
@param cls the raw comparator to use
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="setJobName"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
<exception name="IllegalStateException" type="java.lang.IllegalStateException"/>
<doc>
<![CDATA[Set the user-specified job name.
@param name the job's new name.
@throws IllegalStateException if the job is submitted]]>
</doc>
</method>
<method name="getTrackingURL" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the URL where some job progress information will be displayed.
@return the URL where some job progress information will be displayed.]]>
</doc>
</method>
<method name="mapProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's map-tasks, as a float between 0.0
and 1.0. When all map tasks have completed, the function returns 1.0.
@return the progress of the job's map-tasks.
@throws IOException]]>
</doc>
</method>
<method name="reduceProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the <i>progress</i> of the job's reduce-tasks, as a float between 0.0
and 1.0. When all reduce tasks have completed, the function returns 1.0.
@return the progress of the job's reduce-tasks.
@throws IOException]]>
</doc>
</method>
<method name="isComplete" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job is finished or not.
This is a non-blocking call.
@return <code>true</code> if the job is complete, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="isSuccessful" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check if the job completed successfully.
@return <code>true</code> if the job succeeded, else <code>false</code>.
@throws IOException]]>
</doc>
</method>
<method name="killJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill the running job. Blocks until all job tasks have been
killed as well. If the job is no longer running, it simply returns.
@throws IOException]]>
</doc>
</method>
<method name="getTaskCompletionEvents" return="org.apache.hadoop.mapred.TaskCompletionEvent[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="startFrom" type="int"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get events indicating completion (success/failure) of component tasks.
@param startFrom index to start fetching events from
@return an array of {@link TaskCompletionEvent}s
@throws IOException]]>
</doc>
</method>
<method name="killTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Kill indicated task attempt.
@param taskId the id of the task to be terminated.
@throws IOException]]>
</doc>
</method>
<method name="failTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskId" type="org.apache.hadoop.mapreduce.TaskAttemptID"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Fail indicated task attempt.
@param taskId the id of the task to be terminated.
@throws IOException]]>
</doc>
</method>
<method name="getCounters" return="org.apache.hadoop.mapreduce.Counters"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Gets the counters for this job.
@return the counters for this job.
@throws IOException]]>
</doc>
</method>
<method name="submit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Submit the job to the cluster and return immediately.
@throws IOException]]>
</doc>
</method>
<method name="waitForCompletion" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="verbose" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Submit the job to the cluster and wait for it to finish.
@param verbose print the progress to the user
@return true if the job succeeded
@throws IOException thrown if the communication with the
<code>JobTracker</code> is lost]]>
</doc>
</method>
<doc>
<![CDATA[The job submitter's view of the Job. It allows the user to configure the
job, submit it, control its execution, and query the state. The set methods
only work until the job is submitted, afterwards they will throw an
IllegalStateException.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Job -->
<!-- start class org.apache.hadoop.mapreduce.Job.JobState -->
<class name="Job.JobState" extends="java.lang.Enum"
abstract="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<method name="values" return="org.apache.hadoop.mapreduce.Job.JobState[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="valueOf" return="org.apache.hadoop.mapreduce.Job.JobState"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.String"/>
</method>
<field name="DEFINE" type="org.apache.hadoop.mapreduce.Job.JobState"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<field name="RUNNING" type="org.apache.hadoop.mapreduce.Job.JobState"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapreduce.Job.JobState -->
<!-- start class org.apache.hadoop.mapreduce.JobContext -->
<class name="JobContext" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="JobContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.JobID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getConfiguration" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Return the configuration for the job.
@return the shared configuration object]]>
</doc>
</method>
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the unique ID for the job.
@return the object with the job id]]>
</doc>
</method>
<method name="getNumReduceTasks" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get configured the number of reduce tasks for this job. Defaults to
<code>1</code>.
@return the number of reduce tasks for this job.]]>
</doc>
</method>
<method name="getWorkingDirectory" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the current working directory for the default file system.
@return the directory name.]]>
</doc>
</method>
<method name="getOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the job output data.
@return the key class for the job output data.]]>
</doc>
</method>
<method name="getOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for job outputs.
@return the value class for job outputs.]]>
</doc>
</method>
<method name="getMapOutputKeyClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the key class for the map output data. If it is not set, use the
(final) output key class. This allows the map output key class to be
different than the final output key class.
@return the map output key class.]]>
</doc>
</method>
<method name="getMapOutputValueClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the value class for the map output data. If it is not set, use the
(final) output value class This allows the map output value class to be
different than the final output value class.
@return the map output value class.]]>
</doc>
</method>
<method name="getJobName" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user-specified job name. This is only used to identify the
job to the user.
@return the job's name, defaulting to "".]]>
</doc>
</method>
<method name="getInputFormatClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link InputFormat} class for the job.
@return the {@link InputFormat} class for the job.]]>
</doc>
</method>
<method name="getMapperClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link Mapper} class for the job.
@return the {@link Mapper} class for the job.]]>
</doc>
</method>
<method name="getCombinerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the combiner class for the job.
@return the combiner class for the job.]]>
</doc>
</method>
<method name="getReducerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link Reducer} class for the job.
@return the {@link Reducer} class for the job.]]>
</doc>
</method>
<method name="getOutputFormatClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link OutputFormat} class for the job.
@return the {@link OutputFormat} class for the job.]]>
</doc>
</method>
<method name="getPartitionerClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="ClassNotFoundException" type="java.lang.ClassNotFoundException"/>
<doc>
<![CDATA[Get the {@link Partitioner} class for the job.
@return the {@link Partitioner} class for the job.]]>
</doc>
</method>
<method name="getSortComparator" return="org.apache.hadoop.io.RawComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the {@link RawComparator} comparator used to compare keys.
@return the {@link RawComparator} comparator used to compare keys.]]>
</doc>
</method>
<method name="getJar" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the pathname of the job's jar.
@return the pathname]]>
</doc>
</method>
<method name="getGroupingComparator" return="org.apache.hadoop.io.RawComparator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the user defined {@link RawComparator} comparator for
grouping keys of inputs to the reduce.
@return comparator set by the user for grouping values.
@see Job#setGroupingComparatorClass(Class) for details.]]>
</doc>
</method>
<field name="INPUT_FORMAT_CLASS_ATTR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="MAP_CLASS_ATTR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="COMBINE_CLASS_ATTR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="REDUCE_CLASS_ATTR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="OUTPUT_FORMAT_CLASS_ATTR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="PARTITIONER_CLASS_ATTR" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="conf" type="org.apache.hadoop.mapred.JobConf"
transient="false" volatile="false"
static="false" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A read-only view of the job that is provided to the tasks while they
are running.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.JobContext -->
<!-- start class org.apache.hadoop.mapreduce.JobID -->
<class name="JobID" extends="org.apache.hadoop.mapred.ID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.lang.Comparable"/>
<constructor name="JobID" type="java.lang.String, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a JobID object
@param jtIdentifier jobTracker identifier
@param id job number]]>
</doc>
</constructor>
<constructor name="JobID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getJtIdentifier" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare JobIds by first jtIdentifiers, then by job numbers]]>
</doc>
</method>
<method name="appendTo" return="java.lang.StringBuilder"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="builder" type="java.lang.StringBuilder"/>
<doc>
<![CDATA[Add the stuff after the "job" prefix to the given builder. This is useful,
because the sub-ids use this substring at the start of their string.
@param builder the builder to append to
@return the builder that was passed in]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a JobId object from given string
@return constructed JobId object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<field name="JOB" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="idFormat" type="java.text.NumberFormat"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[JobID represents the immutable and unique identifier for
the job. JobID consists of two parts. First part
represents the jobtracker identifier, so that jobID to jobtracker map
is defined. For cluster setup this string is the jobtracker
start time, for local setting, it is "local".
Second part of the JobID is the job number. <br>
An example JobID is :
<code>job_200707121733_0003</code> , which represents the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse JobID strings, but rather
use appropriate constructors or {@link #forName(String)} method.
@see TaskID
@see TaskAttemptID
@see org.apache.hadoop.mapred.JobTracker#getNewJobId()
@see org.apache.hadoop.mapred.JobTracker#getStartTime()]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.JobID -->
<!-- start class org.apache.hadoop.mapreduce.MapContext -->
<class name="MapContext" extends="org.apache.hadoop.mapreduce.TaskInputOutputContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MapContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapreduce.RecordReader, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.mapreduce.InputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getInputSplit" return="org.apache.hadoop.mapreduce.InputSplit"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the input split for this map.]]>
</doc>
</method>
<method name="getCurrentKey" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentValue" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<doc>
<![CDATA[The context that is given to the {@link Mapper}.
@param <KEYIN> the key input type to the Mapper
@param <VALUEIN> the value input type to the Mapper
@param <KEYOUT> the key output type from the Mapper
@param <VALUEOUT> the value output type from the Mapper]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.MapContext -->
<!-- start class org.apache.hadoop.mapreduce.Mapper -->
<class name="Mapper" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Mapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the beginning of the task.]]>
</doc>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once for each key/value pair in the input split. Most applications
should override this, but the default is the identity function.]]>
</doc>
</method>
<method name="cleanup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the end of the task.]]>
</doc>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Expert users can override this method for more complete control over the
execution of the Mapper.
@param context
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[Maps input key/value pairs to a set of intermediate key/value pairs.
<p>Maps are the individual tasks which transform input records into a
intermediate records. The transformed intermediate records need not be of
the same type as the input records. A given input pair may map to zero or
many output pairs.</p>
<p>The Hadoop Map-Reduce framework spawns one map task for each
{@link InputSplit} generated by the {@link InputFormat} for the job.
<code>Mapper</code> implementations can access the {@link Configuration} for
the job via the {@link JobContext#getConfiguration()}.
<p>The framework first calls
{@link #setup(org.apache.hadoop.mapreduce.Mapper.Context)}, followed by
{@link #map(Object, Object, Context)}
for each key/value pair in the <code>InputSplit</code>. Finally
{@link #cleanup(Context)} is called.</p>
<p>All intermediate values associated with a given output key are
subsequently grouped by the framework, and passed to a {@link Reducer} to
determine the final output. Users can control the sorting and grouping by
specifying two key {@link RawComparator} classes.</p>
<p>The <code>Mapper</code> outputs are partitioned per
<code>Reducer</code>. Users can control which keys (and hence records) go to
which <code>Reducer</code> by implementing a custom {@link Partitioner}.
<p>Users can optionally specify a <code>combiner</code>, via
{@link Job#setCombinerClass(Class)}, to perform local aggregation of the
intermediate outputs, which helps to cut down the amount of data transferred
from the <code>Mapper</code> to the <code>Reducer</code>.
<p>Applications can specify if and how the intermediate
outputs are to be compressed and which {@link CompressionCodec}s are to be
used via the <code>Configuration</code>.</p>
<p>If the job has zero
reduces then the output of the <code>Mapper</code> is directly written
to the {@link OutputFormat} without sorting by keys.</p>
<p>Example:</p>
<p><blockquote><pre>
public class TokenCounterMapper
extends Mapper<Object, Text, Text, IntWritable>{
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
public void map(Object key, Text value, Context context) throws IOException {
StringTokenizer itr = new StringTokenizer(value.toString());
while (itr.hasMoreTokens()) {
word.set(itr.nextToken());
context.collect(word, one);
}
}
}
</pre></blockquote></p>
<p>Applications may override the {@link #run(Context)} method to exert
greater control on map processing e.g. multi-threaded <code>Mapper</code>s
etc.</p>
@see InputFormat
@see JobContext
@see Partitioner
@see Reducer]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Mapper -->
<!-- start class org.apache.hadoop.mapreduce.Mapper.Context -->
<class name="Mapper.Context" extends="org.apache.hadoop.mapreduce.MapContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Mapper.Context" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapreduce.RecordReader, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.mapreduce.InputSplit"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</constructor>
</class>
<!-- end class org.apache.hadoop.mapreduce.Mapper.Context -->
<!-- start class org.apache.hadoop.mapreduce.OutputCommitter -->
<class name="OutputCommitter" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OutputCommitter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setupJob"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For the framework to setup the job output during initialization
@param jobContext Context of the job whose output is being written.
@throws IOException if temporary output could not be created]]>
</doc>
</method>
<method name="cleanupJob"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="jobContext" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[For cleaning up the job's output after job completion
@param jobContext Context of the job whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="setupTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Sets up output for the task.
@param taskContext Context of the task whose output is being written.
@throws IOException]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Check whether task needs a commit
@param taskContext
@return true/false
@throws IOException]]>
</doc>
</method>
<method name="commitTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[To promote the task's temporary output to final output location
The task's output is moved to the job's output directory.
@param taskContext Context of the task whose output is being written.
@throws IOException if commit is not]]>
</doc>
</method>
<method name="abortTask"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="taskContext" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Discard the task output
@param taskContext
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputCommitter</code> describes the commit of task output for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputCommitter</code> of
the job to:<p>
<ol>
<li>
Setup the job during initialization. For example, create the temporary
output directory for the job during the initialization of the job.
</li>
<li>
Cleanup the job after the job completion. For example, remove the
temporary output directory after the job completion.
</li>
<li>
Setup the task temporary output.
</li>
<li>
Check whether a task needs a commit. This is to avoid the commit
procedure if a task does not need commit.
</li>
<li>
Commit of the task output.
</li>
<li>
Discard the task commit.
</li>
</ol>
@see org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
@see JobContext
@see TaskAttemptContext]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.OutputCommitter -->
<!-- start class org.apache.hadoop.mapreduce.OutputFormat -->
<class name="OutputFormat" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="OutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the {@link RecordWriter} for the given task.
@param context the information about the current task.
@return a {@link RecordWriter} to write the output for the job.
@throws IOException]]>
</doc>
</method>
<method name="checkOutputSpecs"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Check for validity of the output-specification for the job.
<p>This is to validate the output specification for the job when it is
a job is submitted. Typically checks that it does not already exist,
throwing an exception when it already exists, so that output is not
overwritten.</p>
@param context information about the job
@throws IOException when output should not be attempted]]>
</doc>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the output committer for this output format. This is responsible
for ensuring the output is committed correctly.
@param context the task context
@return an output committer
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<doc>
<![CDATA[<code>OutputFormat</code> describes the output-specification for a
Map-Reduce job.
<p>The Map-Reduce framework relies on the <code>OutputFormat</code> of the
job to:<p>
<ol>
<li>
Validate the output-specification of the job. For e.g. check that the
output directory doesn't already exist.
<li>
Provide the {@link RecordWriter} implementation to be used to write out
the output files of the job. Output files are stored in a
{@link FileSystem}.
</li>
</ol>
@see RecordWriter]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.OutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.Partitioner -->
<class name="Partitioner" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Partitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getPartition" return="int"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="numPartitions" type="int"/>
<doc>
<![CDATA[Get the partition number for a given key (hence record) given the total
number of partitions i.e. number of reduce-tasks for the job.
<p>Typically a hash function on a all or a subset of the key.</p>
@param key the key to be partioned.
@param value the entry value.
@param numPartitions the total number of partitions.
@return the partition number for the <code>key</code>.]]>
</doc>
</method>
<doc>
<![CDATA[Partitions the key space.
<p><code>Partitioner</code> controls the partitioning of the keys of the
intermediate map-outputs. The key (or a subset of the key) is used to derive
the partition, typically by a hash function. The total number of partitions
is the same as the number of reduce tasks for the job. Hence this controls
which of the <code>m</code> reduce tasks the intermediate key (and hence the
record) is sent for reduction.</p>
@see Reducer]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Partitioner -->
<!-- start class org.apache.hadoop.mapreduce.RecordReader -->
<class name="RecordReader" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="java.io.Closeable"/>
<constructor name="RecordReader"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="initialize"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at initialization.
@param split the split that defines the range of records to read
@param context the information about the task
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="nextKeyValue" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Read the next key, value pair.
@return true if a key/value pair was read
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getCurrentKey" return="java.lang.Object"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current key
@return the current key or null if there is no current key
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getCurrentValue" return="java.lang.Object"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current value.
@return the object that was read
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getProgress" return="float"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[The current progress of the record reader through its data.
@return a number between 0.0 and 1.0 that is the fraction of the data read
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="close"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Close the record reader.]]>
</doc>
</method>
<doc>
<![CDATA[The record reader breaks the data into key/value pairs for input to the
{@link Mapper}.
@param <KEYIN>
@param <VALUEIN>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.RecordReader -->
<!-- start class org.apache.hadoop.mapreduce.RecordWriter -->
<class name="RecordWriter" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="RecordWriter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="write"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Writes a key/value pair.
@param key the key to write.
@param value the value to write.
@throws IOException]]>
</doc>
</method>
<method name="close"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Close this <code>RecordWriter</code> to future operations.
@param context the context of the task
@throws IOException]]>
</doc>
</method>
<doc>
<![CDATA[<code>RecordWriter</code> writes the output &lt;key, value&gt; pairs
to an output file.
<p><code>RecordWriter</code> implementations write the job outputs to the
{@link FileSystem}.
@see OutputFormat]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.RecordWriter -->
<!-- start class org.apache.hadoop.mapreduce.ReduceContext -->
<class name="ReduceContext" extends="org.apache.hadoop.mapreduce.TaskInputOutputContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="ReduceContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapred.RawKeyValueIterator, org.apache.hadoop.mapreduce.Counter, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.io.RawComparator, java.lang.Class, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<exception name="IOException" type="java.io.IOException"/>
</constructor>
<method name="nextKey" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Start processing next unique key.]]>
</doc>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Advance to the next key/value pair.]]>
</doc>
</method>
<method name="getCurrentKey" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getCurrentValue" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getValues" return="java.lang.Iterable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Iterate through the values for the current key, reusing the same value
object, which is stored in the context.
@return the series of values associated with the current key. All of the
objects returned directly and indirectly from this method are reused.]]>
</doc>
</method>
<field name="reporter" type="org.apache.hadoop.util.Progressable"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[The context passed to the {@link Reducer}.
@param <KEYIN> the class of the input keys
@param <VALUEIN> the class of the input values
@param <KEYOUT> the class of the output keys
@param <VALUEOUT> the class of the output values]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.ReduceContext -->
<!-- start class org.apache.hadoop.mapreduce.ReduceContext.ValueIterable -->
<class name="ReduceContext.ValueIterable" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="java.lang.Iterable"/>
<constructor name="ReduceContext.ValueIterable"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="iterator" return="java.util.Iterator"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.ReduceContext.ValueIterable -->
<!-- start class org.apache.hadoop.mapreduce.ReduceContext.ValueIterator -->
<class name="ReduceContext.ValueIterator" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<implements name="java.util.Iterator"/>
<constructor name="ReduceContext.ValueIterator"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</constructor>
<method name="hasNext" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="next" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="remove"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.ReduceContext.ValueIterator -->
<!-- start class org.apache.hadoop.mapreduce.Reducer -->
<class name="Reducer" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Reducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the start of the task.]]>
</doc>
</method>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[This method is called once for each key. Most applications will define
their reduce class by overriding this method. The default implementation
is an identity function.]]>
</doc>
</method>
<method name="cleanup"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Called once at the end of the task.]]>
</doc>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Advanced application writers can use the
{@link #run(org.apache.hadoop.mapreduce.Reducer.Context)} method to
control how the reduce task works.]]>
</doc>
</method>
<doc>
<![CDATA[Reduces a set of intermediate values which share a key to a smaller set of
values.
<p><code>Reducer</code> implementations
can access the {@link Configuration} for the job via the
{@link JobContext#getConfiguration()} method.</p>
<p><code>Reducer</code> has 3 primary phases:</p>
<ol>
<li>
<h4 id="Shuffle">Shuffle</h4>
<p>The <code>Reducer</code> copies the sorted output from each
{@link Mapper} using HTTP across the network.</p>
</li>
<li>
<h4 id="Sort">Sort</h4>
<p>The framework merge sorts <code>Reducer</code> inputs by
<code>key</code>s
(since different <code>Mapper</code>s may have output the same key).</p>
<p>The shuffle and sort phases occur simultaneously i.e. while outputs are
being fetched they are merged.</p>
<h5 id="SecondarySort">SecondarySort</h5>
<p>To achieve a secondary sort on the values returned by the value
iterator, the application should extend the key with the secondary
key and define a grouping comparator. The keys will be sorted using the
entire key, but will be grouped using the grouping comparator to decide
which keys and values are sent in the same call to reduce.The grouping
comparator is specified via
{@link Job#setGroupingComparatorClass(Class)}. The sort order is
controlled by
{@link Job#setSortComparatorClass(Class)}.</p>
For example, say that you want to find duplicate web pages and tag them
all with the url of the "best" known example. You would set up the job
like:
<ul>
<li>Map Input Key: url</li>
<li>Map Input Value: document</li>
<li>Map Output Key: document checksum, url pagerank</li>
<li>Map Output Value: url</li>
<li>Partitioner: by checksum</li>
<li>OutputKeyComparator: by checksum and then decreasing pagerank</li>
<li>OutputValueGroupingComparator: by checksum</li>
</ul>
</li>
<li>
<h4 id="Reduce">Reduce</h4>
<p>In this phase the
{@link #reduce(Object, Iterable, Context)}
method is called for each <code>&lt;key, (collection of values)></code> in
the sorted inputs.</p>
<p>The output of the reduce task is typically written to a
{@link RecordWriter} via
{@link Context#write(Object, Object)}.</p>
</li>
</ol>
<p>The output of the <code>Reducer</code> is <b>not re-sorted</b>.</p>
<p>Example:</p>
<p><blockquote><pre>
public class IntSumReducer<Key> extends Reducer<Key,IntWritable,
Key,IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Key key, Iterable<IntWritable> values,
Context context) throws IOException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.collect(key, result);
}
}
</pre></blockquote></p>
@see Mapper
@see Partitioner]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.Reducer -->
<!-- start class org.apache.hadoop.mapreduce.Reducer.Context -->
<class name="Reducer.Context" extends="org.apache.hadoop.mapreduce.ReduceContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Reducer.Context" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapred.RawKeyValueIterator, org.apache.hadoop.mapreduce.Counter, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter, org.apache.hadoop.io.RawComparator, java.lang.Class, java.lang.Class"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</constructor>
</class>
<!-- end class org.apache.hadoop.mapreduce.Reducer.Context -->
<!-- start class org.apache.hadoop.mapreduce.StatusReporter -->
<class name="StatusReporter" extends="java.lang.Object"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="StatusReporter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="name" type="java.lang.Enum"/>
</method>
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="group" type="java.lang.String"/>
<param name="name" type="java.lang.String"/>
</method>
<method name="progress"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setStatus"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="java.lang.String"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.StatusReporter -->
<!-- start class org.apache.hadoop.mapreduce.TaskAttemptContext -->
<class name="TaskAttemptContext" extends="org.apache.hadoop.mapreduce.JobContext"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Progressable"/>
<constructor name="TaskAttemptContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getTaskAttemptID" return="org.apache.hadoop.mapreduce.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the unique name for this task attempt.]]>
</doc>
</method>
<method name="setStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="msg" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the current status of the task to the given string.]]>
</doc>
</method>
<method name="getStatus" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the last set status message.
@return the current status message]]>
</doc>
</method>
<method name="progress"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Report progress. The subtypes actually do work in this method.]]>
</doc>
</method>
<doc>
<![CDATA[The context for task attempts.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskAttemptContext -->
<!-- start class org.apache.hadoop.mapreduce.TaskAttemptID -->
<class name="TaskAttemptID" extends="org.apache.hadoop.mapred.ID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskAttemptID" type="org.apache.hadoop.mapreduce.TaskID, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskAttemptID object from given {@link TaskID}.
@param taskId TaskID that this task belongs to
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID" type="java.lang.String, int, boolean, int, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param taskId taskId number
@param id the task attempt number]]>
</doc>
</constructor>
<constructor name="TaskAttemptID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the {@link JobID} object that this task attempt belongs to]]>
</doc>
</method>
<method name="getTaskID" return="org.apache.hadoop.mapreduce.TaskID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the {@link TaskID} object that this task attempt belongs to]]>
</doc>
</method>
<method name="isMap" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns whether this TaskAttemptID is a map ID]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="appendTo" return="java.lang.StringBuilder"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="builder" type="java.lang.StringBuilder"/>
<doc>
<![CDATA[Add the unique string to the StringBuilder
@param builder the builder to append ot
@return the builder that was passed in.]]>
</doc>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare TaskIds by first tipIds, then by task numbers.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="forName" return="org.apache.hadoop.mapreduce.TaskAttemptID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a TaskAttemptID object from given string
@return constructed TaskAttemptID object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<field name="ATTEMPT" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[TaskAttemptID represents the immutable and unique identifier for
a task attempt. Each task attempt is one particular instance of a Map or
Reduce Task identified by its TaskID.
TaskAttemptID consists of 2 parts. First part is the
{@link TaskID}, that this TaskAttemptID belongs to.
Second part is the task attempt number. <br>
An example TaskAttemptID is :
<code>attempt_200707121733_0003_m_000005_0</code> , which represents the
zeroth task attempt for the fifth map task in the third job
running at the jobtracker started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskAttemptID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskAttemptID -->
<!-- start class org.apache.hadoop.mapreduce.TaskID -->
<class name="TaskID" extends="org.apache.hadoop.mapred.ID"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TaskID" type="org.apache.hadoop.mapreduce.JobID, boolean, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskID object from given {@link JobID}.
@param jobId JobID that this tip belongs to
@param isMap whether the tip is a map
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID" type="java.lang.String, int, boolean, int"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a TaskInProgressId object from given parts.
@param jtIdentifier jobTracker identifier
@param jobId job number
@param isMap whether the tip is a map
@param id the tip number]]>
</doc>
</constructor>
<constructor name="TaskID"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getJobID" return="org.apache.hadoop.mapreduce.JobID"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns the {@link JobID} object that this tip belongs to]]>
</doc>
</method>
<method name="isMap" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Returns whether this TaskID is a map ID]]>
</doc>
</method>
<method name="equals" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="java.lang.Object"/>
</method>
<method name="compareTo" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="o" type="org.apache.hadoop.mapreduce.ID"/>
<doc>
<![CDATA[Compare TaskInProgressIds by first jobIds, then by tip numbers. Reduces are
defined as greater then maps.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="appendTo" return="java.lang.StringBuilder"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="builder" type="java.lang.StringBuilder"/>
<doc>
<![CDATA[Add the unique string to the given builder.
@param builder the builder to append to
@return the builder that was passed in]]>
</doc>
</method>
<method name="hashCode" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="forName" return="org.apache.hadoop.mapreduce.TaskID"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="str" type="java.lang.String"/>
<exception name="IllegalArgumentException" type="java.lang.IllegalArgumentException"/>
<doc>
<![CDATA[Construct a TaskID object from given string
@return constructed TaskID object or null if the given String is null
@throws IllegalArgumentException if the given string is malformed]]>
</doc>
</method>
<field name="TASK" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<field name="idFormat" type="java.text.NumberFormat"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[TaskID represents the immutable and unique identifier for
a Map or Reduce Task. Each TaskID encompasses multiple attempts made to
execute the Map or Reduce Task, each of which are uniquely indentified by
their TaskAttemptID.
TaskID consists of 3 parts. First part is the {@link JobID}, that this
TaskInProgress belongs to. Second part of the TaskID is either 'm' or 'r'
representing whether the task is a map task or a reduce task.
And the third part is the task number. <br>
An example TaskID is :
<code>task_200707121733_0003_m_000005</code> , which represents the
fifth map task in the third job running at the jobtracker
started at <code>200707121733</code>.
<p>
Applications should never construct or parse TaskID strings
, but rather use appropriate constructors or {@link #forName(String)}
method.
@see JobID
@see TaskAttemptID]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskID -->
<!-- start class org.apache.hadoop.mapreduce.TaskInputOutputContext -->
<class name="TaskInputOutputContext" extends="org.apache.hadoop.mapreduce.TaskAttemptContext"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Progressable"/>
<constructor name="TaskInputOutputContext" type="org.apache.hadoop.conf.Configuration, org.apache.hadoop.mapreduce.TaskAttemptID, org.apache.hadoop.mapreduce.RecordWriter, org.apache.hadoop.mapreduce.OutputCommitter, org.apache.hadoop.mapreduce.StatusReporter"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="nextKeyValue" return="boolean"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Advance to the next key, value pair, returning null if at end.
@return the key object that was read into, or null if no more]]>
</doc>
</method>
<method name="getCurrentKey" return="java.lang.Object"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current key.
@return the current key object or null if there isn't one
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="getCurrentValue" return="java.lang.Object"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the current value.
@return the value object that was read into
@throws IOException
@throws InterruptedException]]>
</doc>
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Generate an output key/value pair.]]>
</doc>
</method>
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="counterName" type="java.lang.Enum"/>
</method>
<method name="getCounter" return="org.apache.hadoop.mapreduce.Counter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="groupName" type="java.lang.String"/>
<param name="counterName" type="java.lang.String"/>
</method>
<method name="progress"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="setStatus"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="status" type="java.lang.String"/>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<doc>
<![CDATA[A context object that allows input and output from the task. It is only
supplied to the {@link Mapper} or {@link Reducer}.
@param <KEYIN> the input key type for the task
@param <VALUEIN> the input value type for the task
@param <KEYOUT> the output key type for the task
@param <VALUEOUT> the output value type for the task]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.TaskInputOutputContext -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.input">
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
<class name="FileInputFormat" extends="org.apache.hadoop.mapreduce.InputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getFormatMinSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Get the lower bound on split size imposed by the format.
@return the number of bytes of the minimal split for this format]]>
</doc>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="filename" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Is the given filename splitable? Usually, true, but if the file is
stream compressed, it will not be.
<code>FileInputFormat</code> implementations can override this and return
<code>false</code> to ensure that individual input files are never split-up
so that {@link Mapper}s process entire files.
@param context the job context
@param filename the file name to check
@return is this file splitable?]]>
</doc>
</method>
<method name="setInputPathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="filter" type="java.lang.Class"/>
<doc>
<![CDATA[Set a PathFilter to be applied to the input paths for the map-reduce job.
@param job the job to modify
@param filter the PathFilter class use for filtering the input paths.]]>
</doc>
</method>
<method name="setMinInputSplitSize"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="size" type="long"/>
<doc>
<![CDATA[Set the minimum input split size
@param job the job to modify
@param size the minimum size]]>
</doc>
</method>
<method name="getMinSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the minimum split size
@param job the job
@return the minimum number of bytes that can be in a split]]>
</doc>
</method>
<method name="setMaxInputSplitSize"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="size" type="long"/>
<doc>
<![CDATA[Set the maximum split size
@param job the job to modify
@param size the maximum split size]]>
</doc>
</method>
<method name="getMaxSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the maximum split size.
@param context the job to look at.
@return the maximum number of bytes a split can include]]>
</doc>
</method>
<method name="getInputPathFilter" return="org.apache.hadoop.fs.PathFilter"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get a PathFilter instance of the filter set for the input paths.
@return the PathFilter instance set for the job, NULL if none has been set.]]>
</doc>
</method>
<method name="listStatus" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[List input directories.
Subclasses may override to, e.g., select only files matching a regular
expression.
@param job the job to list input paths for
@return array of FileStatus objects
@throws IOException if zero items.]]>
</doc>
</method>
<method name="getSplits" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Generate the list of files and make them into FileSplits.]]>
</doc>
</method>
<method name="computeSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blockSize" type="long"/>
<param name="minSize" type="long"/>
<param name="maxSize" type="long"/>
</method>
<method name="getBlockIndex" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="blkLocations" type="org.apache.hadoop.fs.BlockLocation[]"/>
<param name="offset" type="long"/>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Sets the given comma separated paths as the list of inputs
for the map-reduce job.
@param job the job
@param commaSeparatedPaths Comma separated paths to be set as
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="commaSeparatedPaths" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add the given comma separated paths to the list of inputs for
the map-reduce job.
@param job The job to modify
@param commaSeparatedPaths Comma separated paths to be added to
the list of inputs for the map-reduce job.]]>
</doc>
</method>
<method name="setInputPaths"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="inputPaths" type="org.apache.hadoop.fs.Path[]"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Set the array of {@link Path}s as the list of inputs
for the map-reduce job.
@param job The job to modify
@param inputPaths the {@link Path}s of the input directories/files
for the map-reduce job.]]>
</doc>
</method>
<method name="addInputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="path" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Add a {@link Path} to the list of inputs for the map-reduce job.
@param job The {@link Job} to modify
@param path {@link Path} to be added to the list of inputs for
the map-reduce job.]]>
</doc>
</method>
<method name="getInputPaths" return="org.apache.hadoop.fs.Path[]"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the list of input {@link Path}s for the map-reduce job.
@param context The job
@return the list of input {@link Path}s for the map-reduce job.]]>
</doc>
</method>
<doc>
<![CDATA[A base class for file-based {@link InputFormat}s.
<p><code>FileInputFormat</code> is the base class for all file-based
<code>InputFormat</code>s. This provides a generic implementation of
{@link #getSplits(JobContext)}.
Subclasses of <code>FileInputFormat</code> can also override the
{@link #isSplitable(JobContext, Path)} method to ensure input-files are
not split-up and are processed as a whole by {@link Mapper}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
<class name="FileSplit" extends="org.apache.hadoop.mapreduce.InputSplit"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.io.Writable"/>
<constructor name="FileSplit" type="org.apache.hadoop.fs.Path, long, long, java.lang.String[]"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Constructs a split with host information
@param file the file name
@param start the position of the first byte in the file to process
@param length the number of bytes in the file to process
@param hosts the list of hosts containing the block, possibly null]]>
</doc>
</constructor>
<method name="getPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The file containing this split's data.]]>
</doc>
</method>
<method name="getStart" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The position of the first byte in the file to process.]]>
</doc>
</method>
<method name="getLength" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[The number of bytes in the file to process.]]>
</doc>
</method>
<method name="toString" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="write"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="out" type="java.io.DataOutput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="readFields"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="in" type="java.io.DataInput"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getLocations" return="java.lang.String[]"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A section of an input file. Returned by {@link
InputFormat#getSplits(JobContext)} and passed to
{@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.FileSplit -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
<class name="InvalidInputException" extends="java.io.IOException"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InvalidInputException" type="java.util.List"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Create the exception with the given list.
@param probs the list of problems to report. this list is not copied.]]>
</doc>
</constructor>
<method name="getProblems" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the complete list of the problems reported.
@return the list of problems, which must not be modified]]>
</doc>
</method>
<method name="getMessage" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get a summary message of the problems found.
@return the concatenated messages from all of the problems.]]>
</doc>
</method>
<doc>
<![CDATA[This class wraps a list of problems with the input, so that the user
can get a list of problems together instead of finding and fixing them one
by one.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.InvalidInputException -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.LineRecordReader -->
<class name="LineRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="LineRecordReader"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="genericSplit" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getCurrentKey" return="org.apache.hadoop.io.LongWritable"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getCurrentValue" return="org.apache.hadoop.io.Text"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Get the progress within the split]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[Treats keys as offset in file and value as line.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.LineRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
<class name="SequenceFileInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="getFormatMinSplitSize" return="long"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</method>
<method name="listStatus" return="java.util.List"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
<class name="SequenceFileRecordReader" extends="org.apache.hadoop.mapreduce.RecordReader"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileRecordReader"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="initialize"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="nextKeyValue" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getCurrentKey" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getCurrentValue" return="java.lang.Object"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="getProgress" return="float"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Return the progress within the input split
@return 0.0 to 1.0 of the input byte range]]>
</doc>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="conf" type="org.apache.hadoop.conf.Configuration"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[An {@link RecordReader} for {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.SequenceFileRecordReader -->
<!-- start class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
<class name="TextInputFormat" extends="org.apache.hadoop.mapreduce.lib.input.FileInputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TextInputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="createRecordReader" return="org.apache.hadoop.mapreduce.RecordReader"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="split" type="org.apache.hadoop.mapreduce.InputSplit"/>
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
</method>
<method name="isSplitable" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="file" type="org.apache.hadoop.fs.Path"/>
</method>
<doc>
<![CDATA[An {@link InputFormat} for plain text files. Files are broken into lines.
Either linefeed or carriage-return are used to signal end of line. Keys are
the position in the file, and values are the line of text..]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.input.TextInputFormat -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.map">
<!-- start class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
<class name="InverseMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="InverseMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[The inverse function. Input keys and values are swapped.]]>
</doc>
</method>
<doc>
<![CDATA[A {@link Mapper} that swaps keys and values.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.InverseMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
<class name="MultithreadedMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="MultithreadedMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getNumberOfThreads" return="int"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[The number of threads in the thread pool that will run the map function.
@param job the job
@return the number of threads]]>
</doc>
</method>
<method name="setNumberOfThreads"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="threads" type="int"/>
<doc>
<![CDATA[Set the number of threads in the pool for running maps.
@param job the job to modify
@param threads the new number of threads]]>
</doc>
</method>
<method name="getMapperClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the application's mapper class.
@param <K1> the map's input key type
@param <V1> the map's input value type
@param <K2> the map's output key type
@param <V2> the map's output value type
@param job the job
@return the mapper class to run]]>
</doc>
</method>
<method name="setMapperClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="cls" type="java.lang.Class"/>
<doc>
<![CDATA[Set the application's mapper class.
@param <K1> the map input key type
@param <V1> the map input value type
@param <K2> the map output key type
@param <V2> the map output value type
@param job the job to modify
@param cls the class to use as the mapper]]>
</doc>
</method>
<method name="run"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Run the application's maps using a thread pool.]]>
</doc>
</method>
<doc>
<![CDATA[Multithreaded implementation for @link org.apache.hadoop.mapreduce.Mapper.
<p>
It can be used instead of the default implementation,
@link org.apache.hadoop.mapred.MapRunner, when the Map operation is not CPU
bound in order to improve throughput.
<p>
Mapper implementations using this MapRunnable must be thread-safe.
<p>
The Map-Reduce job has to be configured with the mapper to use via
{@link #setMapperClass(Configuration, Class)} and
the number of thread the thread-pool can use with the
{@link #getNumberOfThreads(Configuration) method. The default
value is 10 threads.
<p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper -->
<!-- start class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
<class name="TokenCounterMapper" extends="org.apache.hadoop.mapreduce.Mapper"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TokenCounterMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="context" type="org.apache.hadoop.mapreduce.Mapper.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<doc>
<![CDATA[Tokenize the input values and emit each word with a count of 1.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.output">
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
<class name="FileOutputCommitter" extends="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileOutputCommitter" type="org.apache.hadoop.fs.Path, org.apache.hadoop.mapreduce.TaskAttemptContext"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create a file output committer
@param outputPath the job's output path
@param context the task's context
@throws IOException]]>
</doc>
</constructor>
<method name="setupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Create the temporary directory that is the root of all of the task
work directories.
@param context the job's context]]>
</doc>
</method>
<method name="cleanupJob"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Delete the temporary directory, including all of the work directories.
@param context the job's context]]>
</doc>
</method>
<method name="setupTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[No task setup required.]]>
</doc>
</method>
<method name="commitTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Move the files from the work directory to the job output directory
@param context the task context]]>
</doc>
</method>
<method name="abortTask"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<doc>
<![CDATA[Delete the work directory]]>
</doc>
</method>
<method name="needsTaskCommit" return="boolean"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Did this task write any files in the work directory?
@param context the task's context]]>
</doc>
</method>
<method name="getWorkPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the directory that the task should write results into
@return the work directory
@throws IOException]]>
</doc>
</method>
<field name="TEMP_DIR_NAME" type="java.lang.String"
transient="false" volatile="false"
static="true" final="true" visibility="protected"
deprecated="not deprecated">
<doc>
<![CDATA[Temporary directory name]]>
</doc>
</field>
<doc>
<![CDATA[An {@link OutputCommitter} that commits files specified
in job output directory i.e. ${mapred.output.dir}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
<class name="FileOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
abstract="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="FileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setCompressOutput"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="compress" type="boolean"/>
<doc>
<![CDATA[Set whether the output of the job is compressed.
@param job the job to modify
@param compress should the output of the job be compressed?]]>
</doc>
</method>
<method name="getCompressOutput" return="boolean"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Is the job output compressed?
@param job the Job to look in
@return <code>true</code> if the job output should be compressed,
<code>false</code> otherwise]]>
</doc>
</method>
<method name="setOutputCompressorClass"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="codecClass" type="java.lang.Class"/>
<doc>
<![CDATA[Set the {@link CompressionCodec} to be used to compress job outputs.
@param job the job to modify
@param codecClass the {@link CompressionCodec} to be used to
compress the job outputs]]>
</doc>
</method>
<method name="getOutputCompressorClass" return="java.lang.Class"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<param name="defaultValue" type="java.lang.Class"/>
<doc>
<![CDATA[Get the {@link CompressionCodec} for compressing the job outputs.
@param job the {@link Job} to look in
@param defaultValue the {@link CompressionCodec} to return if not set
@return the {@link CompressionCodec} to be used to compress the
job outputs
@throws IllegalArgumentException if the class was specified, but not found]]>
</doc>
</method>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="true" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<exception name="FileAlreadyExistsException" type="org.apache.hadoop.mapred.FileAlreadyExistsException"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="setOutputPath"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="outputDir" type="org.apache.hadoop.fs.Path"/>
<doc>
<![CDATA[Set the {@link Path} of the output directory for the map-reduce job.
@param job The job to modify
@param outputDir the {@link Path} of the output directory for
the map-reduce job.]]>
</doc>
</method>
<method name="getOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the {@link Path} to the output directory for the map-reduce job.
@return the {@link Path} to the output directory for the map-reduce job.
@see FileOutputFormat#getWorkOutputPath(TaskInputOutputContext)]]>
</doc>
</method>
<method name="getWorkOutputPath" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Get the {@link Path} to the task's temporary output directory
for the map-reduce job
<h4 id="SideEffectFiles">Tasks' Side-Effect Files</h4>
<p>Some applications need to create/write-to side-files, which differ from
the actual job-outputs.
<p>In such cases there could be issues with 2 instances of the same TIP
(running simultaneously e.g. speculative tasks) trying to open/write-to the
same file (path) on HDFS. Hence the application-writer will have to pick
unique names per task-attempt (e.g. using the attemptid, say
<tt>attempt_200709221812_0001_m_000000_0</tt>), not just per TIP.</p>
<p>To get around this the Map-Reduce framework helps the application-writer
out by maintaining a special
<tt>${mapred.output.dir}/_temporary/_${taskid}</tt>
sub-directory for each task-attempt on HDFS where the output of the
task-attempt goes. On successful completion of the task-attempt the files
in the <tt>${mapred.output.dir}/_temporary/_${taskid}</tt> (only)
are <i>promoted</i> to <tt>${mapred.output.dir}</tt>. Of course, the
framework discards the sub-directory of unsuccessful task-attempts. This
is completely transparent to the application.</p>
<p>The application-writer can take advantage of this by creating any
side-files required in a work directory during execution
of his task i.e. via
{@link #getWorkOutputPath(TaskInputOutputContext)}, and
the framework will move them out similarly - thus she doesn't have to pick
unique paths per task-attempt.</p>
<p>The entire discussion holds true for maps of jobs with
reducer=NONE (i.e. 0 reduces) since output of the map, in that case,
goes directly to HDFS.</p>
@return the {@link Path} to the task's temporary output directory
for the map-reduce job.]]>
</doc>
</method>
<method name="getPathForWorkFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskInputOutputContext"/>
<param name="name" type="java.lang.String"/>
<param name="extension" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
<doc>
<![CDATA[Helper function to generate a {@link Path} for a file that is unique for
the task within the job output directory.
<p>The path can be used to create custom files from within the map and
reduce tasks. The path name will be unique for each task. The path parent
will be the job output directory.</p>ls
<p>This method uses the {@link #getUniqueFile} method to make the file name
unique for the task.</p>
@param context the context for the task.
@param name the name for the file.
@param extension the extension for the file
@return a unique path accross all tasks of the job.]]>
</doc>
</method>
<method name="getUniqueFile" return="java.lang.String"
abstract="false" native="false" synchronized="true"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<param name="name" type="java.lang.String"/>
<param name="extension" type="java.lang.String"/>
<doc>
<![CDATA[Generate a unique filename, based on the task id, name, and extension
@param context the task that is calling this
@param name the base filename
@param extension the filename extension
@return a string like $name-[mr]-$id$extension]]>
</doc>
</method>
<method name="getDefaultWorkFile" return="org.apache.hadoop.fs.Path"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<param name="extension" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[Get the default path and filename for the output format.
@param context the task context
@param extension an extension to add to the filename
@return a full path $output/_temporary/$taskid/part-[mr]-$id
@throws IOException]]>
</doc>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A base class for {@link OutputFormat}s that read from {@link FileSystem}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.FileOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
<class name="NullOutputFormat" extends="org.apache.hadoop.mapreduce.OutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="NullOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
</method>
<method name="checkOutputSpecs"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.JobContext"/>
</method>
<method name="getOutputCommitter" return="org.apache.hadoop.mapreduce.OutputCommitter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
</method>
<doc>
<![CDATA[Consume all outputs and put them in /dev/null.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.NullOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
<class name="SequenceFileOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="SequenceFileOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<method name="getOutputCompressionType" return="org.apache.hadoop.io.SequenceFile.CompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.JobContext"/>
<doc>
<![CDATA[Get the {@link CompressionType} for the output {@link SequenceFile}.
@param job the {@link Job}
@return the {@link CompressionType} for the output {@link SequenceFile},
defaulting to {@link CompressionType#RECORD}]]>
</doc>
</method>
<method name="setOutputCompressionType"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.Job"/>
<param name="style" type="org.apache.hadoop.io.SequenceFile.CompressionType"/>
<doc>
<![CDATA[Set the {@link CompressionType} for the output {@link SequenceFile}.
@param job the {@link Job} to modify
@param style the {@link CompressionType} for the output
{@link SequenceFile}]]>
</doc>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes {@link SequenceFile}s.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
<class name="TextOutputFormat" extends="org.apache.hadoop.mapreduce.lib.output.FileOutputFormat"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="TextOutputFormat"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getRecordWriter" return="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
<doc>
<![CDATA[An {@link OutputFormat} that writes plain text files.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat -->
<!-- start class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter -->
<class name="TextOutputFormat.LineRecordWriter" extends="org.apache.hadoop.mapreduce.RecordWriter"
abstract="false"
static="true" final="false" visibility="protected"
deprecated="not deprecated">
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream, java.lang.String"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<constructor name="TextOutputFormat.LineRecordWriter" type="java.io.DataOutputStream"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="write"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="close"
abstract="false" native="false" synchronized="true"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="context" type="org.apache.hadoop.mapreduce.TaskAttemptContext"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<field name="out" type="java.io.DataOutputStream"
transient="false" volatile="false"
static="false" final="false" visibility="protected"
deprecated="not deprecated">
</field>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.LineRecordWriter -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.partition">
<!-- start class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
<class name="HashPartitioner" extends="org.apache.hadoop.mapreduce.Partitioner"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="HashPartitioner"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="getPartition" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="value" type="java.lang.Object"/>
<param name="numReduceTasks" type="int"/>
<doc>
<![CDATA[Use {@link Object#hashCode()} to partition.]]>
</doc>
</method>
<doc>
<![CDATA[Partition keys by their {@link Object#hashCode()}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.partition.HashPartitioner -->
</package>
<package name="org.apache.hadoop.mapreduce.lib.reduce">
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
<class name="IntSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="IntSumReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer -->
<!-- start class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
<class name="LongSumReducer" extends="org.apache.hadoop.mapreduce.Reducer"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="LongSumReducer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="reduce"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="java.lang.Object"/>
<param name="values" type="java.lang.Iterable"/>
<param name="context" type="org.apache.hadoop.mapreduce.Reducer.Context"/>
<exception name="IOException" type="java.io.IOException"/>
<exception name="InterruptedException" type="java.lang.InterruptedException"/>
</method>
</class>
<!-- end class org.apache.hadoop.mapreduce.lib.reduce.LongSumReducer -->
</package>
<package name="org.apache.hadoop.tools">
<!-- start class org.apache.hadoop.tools.DistCh -->
<class name="DistCh" extends="org.apache.hadoop.tools.DistTool"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<doc>
<![CDATA[This is the main driver for recursively changing files properties.]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<doc>
<![CDATA[A Map-reduce program to recursively change files properties
such as owner, group and permission.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.tools.DistCh -->
<!-- start class org.apache.hadoop.tools.DistCp -->
<class name="DistCp" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="DistCp" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="copy"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
<param name="srcPath" type="java.lang.String"/>
<param name="destPath" type="java.lang.String"/>
<param name="logPath" type="org.apache.hadoop.fs.Path"/>
<param name="srcAsList" type="boolean"/>
<param name="ignoreReadFailures" type="boolean"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<doc>
<![CDATA[This is the main driver for recursively copying directories
across file systems. It takes at least two cmdline parameters. A source
URL and a destination URL. It then essentially does an "ls -lR" on the
source URL, and writes the output in a round-robin manner to all the map
input files. The mapper actually copies the files allotted to it. The
reduce is empty.]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
</method>
<method name="getRandomId" return="java.lang.String"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
</method>
<field name="LOG" type="org.apache.commons.logging.Log"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
</field>
<doc>
<![CDATA[A Map-reduce program to recursively copy directories between
different file-systems.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.tools.DistCp -->
<!-- start class org.apache.hadoop.tools.DistCp.DuplicationException -->
<class name="DistCp.DuplicationException" extends="java.io.IOException"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<field name="ERROR_CODE" type="int"
transient="false" volatile="false"
static="true" final="true" visibility="public"
deprecated="not deprecated">
<doc>
<![CDATA[Error code for this exception]]>
</doc>
</field>
<doc>
<![CDATA[An exception class for duplicated source files.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.tools.DistCp.DuplicationException -->
<!-- start class org.apache.hadoop.tools.HadoopArchives -->
<class name="HadoopArchives" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.util.Tool"/>
<constructor name="HadoopArchives" type="org.apache.hadoop.conf.Configuration"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="archive"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="srcPaths" type="java.util.List"/>
<param name="archiveName" type="java.lang.String"/>
<param name="dest" type="org.apache.hadoop.fs.Path"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[archive the given source paths into
the dest
@param srcPaths the src paths to be archived
@param dest the dest dir that will contain the archive]]>
</doc>
</method>
<method name="run" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<exception name="Exception" type="java.lang.Exception"/>
<doc>
<![CDATA[the main driver for creating the archives
it takes at least two command line parameters. The src and the
dest. It does an lsr on the source paths.
The mapper created archuves and the reducer creates
the archive index.]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
<doc>
<![CDATA[the main functions]]>
</doc>
</method>
<doc>
<![CDATA[a archive creation utility.
This class provides methods that can be used
to create hadoop archives. For understanding of
Hadoop archives look at {@link HarFileSystem}.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.tools.HadoopArchives -->
<!-- start class org.apache.hadoop.tools.Logalyzer -->
<class name="Logalyzer" extends="java.lang.Object"
abstract="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<constructor name="Logalyzer"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="doArchive"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="logListURI" type="java.lang.String"/>
<param name="archiveDirectory" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[doArchive: Workhorse function to archive log-files.
@param logListURI : The uri which will serve list of log-files to archive.
@param archiveDirectory : The directory to store archived logfiles.
@throws IOException]]>
</doc>
</method>
<method name="doAnalyze"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="inputFilesDirectory" type="java.lang.String"/>
<param name="outputDirectory" type="java.lang.String"/>
<param name="grepPattern" type="java.lang.String"/>
<param name="sortColumns" type="java.lang.String"/>
<param name="columnSeparator" type="java.lang.String"/>
<exception name="IOException" type="java.io.IOException"/>
<doc>
<![CDATA[doAnalyze:
@param inputFilesDirectory : Directory containing the files to be analyzed.
@param outputDirectory : Directory to store analysis (output).
@param grepPattern : Pattern to *grep* for.
@param sortColumns : Sort specification for output.
@param columnSeparator : Column separator.
@throws IOException]]>
</doc>
</method>
<method name="main"
abstract="false" native="false" synchronized="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<param name="args" type="java.lang.String[]"/>
</method>
<doc>
<![CDATA[Logalyzer: A utility tool for archiving and analyzing hadoop logs.
<p>
This tool supports archiving and anaylzing (sort/grep) of log-files.
It takes as input
a) Input uri which will serve uris of the logs to be archived.
b) Output directory (not mandatory).
b) Directory on dfs to archive the logs.
c) The sort/grep patterns for analyzing the files and separator for boundaries.
Usage:
Logalyzer -archive -archiveDir <directory to archive logs> -analysis <directory> -logs <log-list uri> -grep <pattern> -sort <col1, col2> -separator <separator>
<p>]]>
</doc>
</class>
<!-- end class org.apache.hadoop.tools.Logalyzer -->
<!-- start class org.apache.hadoop.tools.Logalyzer.LogComparator -->
<class name="Logalyzer.LogComparator" extends="org.apache.hadoop.io.Text.Comparator"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.conf.Configurable"/>
<constructor name="Logalyzer.LogComparator"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="setConf"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="conf" type="org.apache.hadoop.conf.Configuration"/>
</method>
<method name="getConf" return="org.apache.hadoop.conf.Configuration"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</method>
<method name="compare" return="int"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="b1" type="byte[]"/>
<param name="s1" type="int"/>
<param name="l1" type="int"/>
<param name="b2" type="byte[]"/>
<param name="s2" type="int"/>
<param name="l2" type="int"/>
</method>
<doc>
<![CDATA[A WritableComparator optimized for UTF8 keys of the logs.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.tools.Logalyzer.LogComparator -->
<!-- start class org.apache.hadoop.tools.Logalyzer.LogRegexMapper -->
<class name="Logalyzer.LogRegexMapper" extends="org.apache.hadoop.mapred.MapReduceBase"
abstract="false"
static="true" final="false" visibility="public"
deprecated="not deprecated">
<implements name="org.apache.hadoop.mapred.Mapper"/>
<constructor name="Logalyzer.LogRegexMapper"
static="false" final="false" visibility="public"
deprecated="not deprecated">
</constructor>
<method name="configure"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="job" type="org.apache.hadoop.mapred.JobConf"/>
</method>
<method name="map"
abstract="false" native="false" synchronized="false"
static="false" final="false" visibility="public"
deprecated="not deprecated">
<param name="key" type="org.apache.hadoop.io.WritableComparable"/>
<param name="value" type="org.apache.hadoop.io.Text"/>
<param name="output" type="org.apache.hadoop.mapred.OutputCollector"/>
<param name="reporter" type="org.apache.hadoop.mapred.Reporter"/>
<exception name="IOException" type="java.io.IOException"/>
</method>
<doc>
<![CDATA[A {@link Mapper} that extracts text matching a regular expression.]]>
</doc>
</class>
<!-- end class org.apache.hadoop.tools.Logalyzer.LogRegexMapper -->
</package>
</api>