MAPREDUCE-5762. Port MAPREDUCE-3223 (Remove MRv1 config from mapred-default.xml) to branch-2. (aajisaka)
This commit is contained in:
parent
f0aa2a7466
commit
853cc04668
|
@ -42,6 +42,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
CompletedJob#loadFullHistoryData for code optimization.
|
CompletedJob#loadFullHistoryData for code optimization.
|
||||||
(zxu via rkanter)
|
(zxu via rkanter)
|
||||||
|
|
||||||
|
MAPREDUCE-5762. Port MAPREDUCE-3223 (Remove MRv1 config from
|
||||||
|
mapred-default.xml) to branch-2. (aajisaka)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
|
|
@ -23,47 +23,6 @@
|
||||||
|
|
||||||
<configuration>
|
<configuration>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.jobhistory.location</name>
|
|
||||||
<value></value>
|
|
||||||
<description> If job tracker is static the history files are stored
|
|
||||||
in this single well known place. If No value is set here, by default,
|
|
||||||
it is in the local file system at ${hadoop.log.dir}/history.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.jobhistory.task.numberprogresssplits</name>
|
|
||||||
<value>12</value>
|
|
||||||
<description> Every task attempt progresses from 0.0 to 1.0 [unless
|
|
||||||
it fails or is killed]. We record, for each task attempt, certain
|
|
||||||
statistics over each twelfth of the progress range. You can change
|
|
||||||
the number of intervals we divide the entire range of progress into
|
|
||||||
by setting this property. Higher values give more precision to the
|
|
||||||
recorded data, but costs more memory in the job tracker at runtime.
|
|
||||||
Each increment in this attribute costs 16 bytes per running task.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.userhistorylocation</name>
|
|
||||||
<value></value>
|
|
||||||
<description> User can specify a location to store the history files of
|
|
||||||
a particular job. If nothing is specified, the logs are stored in
|
|
||||||
output directory. The files are stored in "_logs/history/" in the directory.
|
|
||||||
User can stop logging by giving the value "none".
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.jobhistory.completed.location</name>
|
|
||||||
<value></value>
|
|
||||||
<description> The completed job history files are stored at this single well
|
|
||||||
known location. If nothing is specified, the files are stored at
|
|
||||||
${mapreduce.jobtracker.jobhistory.location}/done.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.committer.setup.cleanup.needed</name>
|
<name>mapreduce.job.committer.setup.cleanup.needed</name>
|
||||||
<value>true</value>
|
<value>true</value>
|
||||||
|
@ -98,15 +57,6 @@
|
||||||
set to less than .5</description>
|
set to less than .5</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.address</name>
|
|
||||||
<value>local</value>
|
|
||||||
<description>The host and port that the MapReduce job tracker runs
|
|
||||||
at. If "local", then jobs are run in-process as a single map
|
|
||||||
and reduce task.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.local.clientfactory.class.name</name>
|
<name>mapreduce.local.clientfactory.class.name</name>
|
||||||
<value>org.apache.hadoop.mapred.LocalClientFactory</value>
|
<value>org.apache.hadoop.mapred.LocalClientFactory</value>
|
||||||
|
@ -114,139 +64,11 @@
|
||||||
creating local job runner client</description>
|
creating local job runner client</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.http.address</name>
|
|
||||||
<value>0.0.0.0:50030</value>
|
|
||||||
<description>
|
|
||||||
The job tracker http server address and port the server will listen on.
|
|
||||||
If the port is 0 then the server will start on a free port.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.handler.count</name>
|
|
||||||
<value>10</value>
|
|
||||||
<description>
|
|
||||||
The number of server threads for the JobTracker. This should be roughly
|
|
||||||
4% of the number of tasktracker nodes.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.report.address</name>
|
|
||||||
<value>127.0.0.1:0</value>
|
|
||||||
<description>The interface and port that task tracker server listens on.
|
|
||||||
Since it is only connected to by the tasks, it uses the local interface.
|
|
||||||
EXPERT ONLY. Should only be changed if your host does not have the loopback
|
|
||||||
interface.</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.cluster.local.dir</name>
|
|
||||||
<value>${hadoop.tmp.dir}/mapred/local</value>
|
|
||||||
<description>The local directory where MapReduce stores intermediate
|
|
||||||
data files. May be a comma-separated list of
|
|
||||||
directories on different devices in order to spread disk i/o.
|
|
||||||
Directories that do not exist are ignored.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.system.dir</name>
|
|
||||||
<value>${hadoop.tmp.dir}/mapred/system</value>
|
|
||||||
<description>The directory where MapReduce stores control files.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.staging.root.dir</name>
|
|
||||||
<value>${hadoop.tmp.dir}/mapred/staging</value>
|
|
||||||
<description>The root of the staging area for users' job files
|
|
||||||
In practice, this should be the directory where users' home
|
|
||||||
directories are located (usually /user)
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.cluster.temp.dir</name>
|
|
||||||
<value>${hadoop.tmp.dir}/mapred/temp</value>
|
|
||||||
<description>A shared directory for temporary files.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.local.dir.minspacestart</name>
|
|
||||||
<value>0</value>
|
|
||||||
<description>If the space in mapreduce.cluster.local.dir drops under this,
|
|
||||||
do not ask for more tasks.
|
|
||||||
Value in bytes.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.local.dir.minspacekill</name>
|
|
||||||
<value>0</value>
|
|
||||||
<description>If the space in mapreduce.cluster.local.dir drops under this,
|
|
||||||
do not ask more tasks until all the current ones have finished and
|
|
||||||
cleaned up. Also, to save the rest of the tasks we have running,
|
|
||||||
kill one of them, to clean up some space. Start with the reduce tasks,
|
|
||||||
then go with the ones that have finished the least.
|
|
||||||
Value in bytes.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.expire.trackers.interval</name>
|
|
||||||
<value>600000</value>
|
|
||||||
<description>Expert: The time-interval, in miliseconds, after which
|
|
||||||
a tasktracker is declared 'lost' if it doesn't send heartbeats.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.instrumentation</name>
|
|
||||||
<value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value>
|
|
||||||
<description>Expert: The instrumentation class to associate with each TaskTracker.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.resourcecalculatorplugin</name>
|
|
||||||
<value></value>
|
|
||||||
<description>
|
|
||||||
Name of the class whose instance will be used to query resource information
|
|
||||||
on the tasktracker.
|
|
||||||
|
|
||||||
The class must be an instance of
|
|
||||||
org.apache.hadoop.util.ResourceCalculatorPlugin. If the value is null, the
|
|
||||||
tasktracker attempts to use a class appropriate to the platform.
|
|
||||||
Currently, the only platform supported is Linux.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.taskmemorymanager.monitoringinterval</name>
|
|
||||||
<value>5000</value>
|
|
||||||
<description>The interval, in milliseconds, for which the tasktracker waits
|
|
||||||
between two cycles of monitoring its tasks' memory usage. Used only if
|
|
||||||
tasks' memory management is enabled via mapred.tasktracker.tasks.maxmemory.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.tasks.sleeptimebeforesigkill</name>
|
|
||||||
<value>5000</value>
|
|
||||||
<description>The time, in milliseconds, the tasktracker waits for sending a
|
|
||||||
SIGKILL to a task, after it has been sent a SIGTERM. This is currently
|
|
||||||
not used on WINDOWS where tasks are just sent a SIGTERM.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.maps</name>
|
<name>mapreduce.job.maps</name>
|
||||||
<value>2</value>
|
<value>2</value>
|
||||||
<description>The default number of map tasks per job.
|
<description>The default number of map tasks per job.
|
||||||
Ignored when mapreduce.jobtracker.address is "local".
|
Ignored when mapreduce.framework.name is "local".
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
@ -256,34 +78,10 @@
|
||||||
<description>The default number of reduce tasks per job. Typically set to 99%
|
<description>The default number of reduce tasks per job. Typically set to 99%
|
||||||
of the cluster's reduce capacity, so that if a node fails the reduces can
|
of the cluster's reduce capacity, so that if a node fails the reduces can
|
||||||
still be executed in a single wave.
|
still be executed in a single wave.
|
||||||
Ignored when mapreduce.jobtracker.address is "local".
|
Ignored when mapreduce.framework.name is "local".
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.restart.recover</name>
|
|
||||||
<value>false</value>
|
|
||||||
<description>"true" to enable (job) recovery upon restart,
|
|
||||||
"false" to start afresh
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.jobhistory.block.size</name>
|
|
||||||
<value>3145728</value>
|
|
||||||
<description>The block size of the job history file. Since the job recovery
|
|
||||||
uses job history, its important to dump job history to disk as
|
|
||||||
soon as possible. Note that this is an expert level parameter.
|
|
||||||
The default value is set to 3 MB.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.taskscheduler</name>
|
|
||||||
<value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value>
|
|
||||||
<description>The class responsible for scheduling the tasks.</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.running.map.limit</name>
|
<name>mapreduce.job.running.map.limit</name>
|
||||||
<value>0</value>
|
<value>0</value>
|
||||||
|
@ -322,20 +120,12 @@
|
||||||
<name>mapreduce.job.split.metainfo.maxsize</name>
|
<name>mapreduce.job.split.metainfo.maxsize</name>
|
||||||
<value>10000000</value>
|
<value>10000000</value>
|
||||||
<description>The maximum permissible size of the split metainfo file.
|
<description>The maximum permissible size of the split metainfo file.
|
||||||
The JobTracker won't attempt to read split metainfo files bigger than
|
The MapReduce ApplicationMaster won't attempt to read submitted split metainfo
|
||||||
the configured value.
|
files bigger than this configured value.
|
||||||
No limits if set to -1.
|
No limits if set to -1.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.taskscheduler.maxrunningtasks.perjob</name>
|
|
||||||
<value></value>
|
|
||||||
<description>The maximum number of running tasks for a job before
|
|
||||||
it gets preempted. No limits if undefined.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.map.maxattempts</name>
|
<name>mapreduce.map.maxattempts</name>
|
||||||
<value>4</value>
|
<value>4</value>
|
||||||
|
@ -395,7 +185,7 @@
|
||||||
<name>mapreduce.reduce.shuffle.connect.timeout</name>
|
<name>mapreduce.reduce.shuffle.connect.timeout</name>
|
||||||
<value>180000</value>
|
<value>180000</value>
|
||||||
<description>Expert: The maximum amount of time (in milli seconds) reduce
|
<description>Expert: The maximum amount of time (in milli seconds) reduce
|
||||||
task spends in trying to connect to a tasktracker for getting map output.
|
task spends in trying to connect to a remote node for getting map output.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
@ -432,22 +222,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.map.tasks.maximum</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description>The maximum number of map tasks that will be run
|
|
||||||
simultaneously by a task tracker.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.reduce.tasks.maximum</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description>The maximum number of reduce tasks that will be run
|
|
||||||
simultaneously by a task tracker.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.map.memory.mb</name>
|
<name>mapreduce.map.memory.mb</name>
|
||||||
<value>1024</value>
|
<value>1024</value>
|
||||||
|
@ -480,36 +254,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.retiredjobs.cache.size</name>
|
|
||||||
<value>1000</value>
|
|
||||||
<description>The number of retired job status to keep in the cache.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.outofband.heartbeat</name>
|
|
||||||
<value>false</value>
|
|
||||||
<description>Expert: Set this to true to let the tasktracker send an
|
|
||||||
out-of-band heartbeat on task-completion for better latency.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.jobhistory.lru.cache.size</name>
|
|
||||||
<value>5</value>
|
|
||||||
<description>The number of job history files loaded in memory. The jobs are
|
|
||||||
loaded when they are first accessed. The cache is cleared based on LRU.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.instrumentation</name>
|
|
||||||
<value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value>
|
|
||||||
<description>Expert: The instrumentation class to associate with each JobTracker.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapred.child.java.opts</name>
|
<name>mapred.child.java.opts</name>
|
||||||
<value>-Xmx200m</value>
|
<value>-Xmx200m</value>
|
||||||
|
@ -806,14 +550,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.jvm.numtasks</name>
|
|
||||||
<value>1</value>
|
|
||||||
<description>How many tasks to run per jvm. If set to -1, there is
|
|
||||||
no limit.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.ubertask.enable</name>
|
<name>mapreduce.job.ubertask.enable</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
|
@ -884,13 +620,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.maxtasks.perjob</name>
|
|
||||||
<value>-1</value>
|
|
||||||
<description>The maximum number of tasks for a single job.
|
|
||||||
A value of -1 indicates that there is no maximum. </description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.input.lineinputformat.linespermap</name>
|
<name>mapreduce.input.lineinputformat.linespermap</name>
|
||||||
<value>1</value>
|
<value>1</value>
|
||||||
|
@ -898,6 +627,7 @@
|
||||||
to include in each split.</description>
|
to include in each split.</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.client.submit.file.replication</name>
|
<name>mapreduce.client.submit.file.replication</name>
|
||||||
<value>10</value>
|
<value>10</value>
|
||||||
|
@ -906,41 +636,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.dns.interface</name>
|
|
||||||
<value>default</value>
|
|
||||||
<description>The name of the Network Interface from which a task
|
|
||||||
tracker should report its IP address.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.dns.nameserver</name>
|
|
||||||
<value>default</value>
|
|
||||||
<description>The host name or IP address of the name server (DNS)
|
|
||||||
which a TaskTracker should use to determine the host name used by
|
|
||||||
the JobTracker for communication and display purposes.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.http.threads</name>
|
|
||||||
<value>40</value>
|
|
||||||
<description>The number of worker threads that for the http server. This is
|
|
||||||
used for map output fetching
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.http.address</name>
|
|
||||||
<value>0.0.0.0:50060</value>
|
|
||||||
<description>
|
|
||||||
The task tracker http server address and port.
|
|
||||||
If the port is 0 then the server will start on a free port.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.task.files.preserve.failedtasks</name>
|
<name>mapreduce.task.files.preserve.failedtasks</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
|
@ -1073,54 +768,10 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.job.userlog.retain.hours</name>
|
|
||||||
<value>24</value>
|
|
||||||
<description>The maximum time, in hours, for which the user-logs are to be
|
|
||||||
retained after the job completion.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.hosts.filename</name>
|
|
||||||
<value></value>
|
|
||||||
<description>Names a file that contains the list of nodes that may
|
|
||||||
connect to the jobtracker. If the value is empty, all hosts are
|
|
||||||
permitted.</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.hosts.exclude.filename</name>
|
|
||||||
<value></value>
|
|
||||||
<description>Names a file that contains the list of hosts that
|
|
||||||
should be excluded by the jobtracker. If the value is empty, no
|
|
||||||
hosts are excluded.</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.heartbeats.in.second</name>
|
|
||||||
<value>100</value>
|
|
||||||
<description>Expert: Approximate number of heart-beats that could arrive
|
|
||||||
at JobTracker in a second. Assuming each RPC can be processed
|
|
||||||
in 10msec, the default value is made 100 RPCs in a second.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.tasktracker.maxblacklists</name>
|
|
||||||
<value>4</value>
|
|
||||||
<description>The number of blacklists for a taskTracker by various jobs
|
|
||||||
after which the task tracker could be blacklisted across
|
|
||||||
all jobs. The tracker will be given a tasks later
|
|
||||||
(after a day). The tracker will become a healthy
|
|
||||||
tracker after a restart.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.maxtaskfailures.per.tracker</name>
|
<name>mapreduce.job.maxtaskfailures.per.tracker</name>
|
||||||
<value>3</value>
|
<value>3</value>
|
||||||
<description>The number of task-failures on a tasktracker of a given job
|
<description>The number of task-failures on a node manager of a given job
|
||||||
after which new tasks of that job aren't assigned to it. It
|
after which new tasks of that job aren't assigned to it. It
|
||||||
MUST be less than mapreduce.map.maxattempts and
|
MUST be less than mapreduce.map.maxattempts and
|
||||||
mapreduce.reduce.maxattempts otherwise the failed task will
|
mapreduce.reduce.maxattempts otherwise the failed task will
|
||||||
|
@ -1142,8 +793,8 @@
|
||||||
<name>mapreduce.client.completion.pollinterval</name>
|
<name>mapreduce.client.completion.pollinterval</name>
|
||||||
<value>5000</value>
|
<value>5000</value>
|
||||||
<description>The interval (in milliseconds) between which the JobClient
|
<description>The interval (in milliseconds) between which the JobClient
|
||||||
polls the JobTracker for updates about job status. You may want to set this
|
polls the MapReduce ApplicationMaster for updates about job status. You may want to
|
||||||
to a lower value to make tests run faster on a single node system. Adjusting
|
set this to a lower value to make tests run faster on a single node system. Adjusting
|
||||||
this value in production may lead to unwanted client-server traffic.
|
this value in production may lead to unwanted client-server traffic.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
@ -1158,32 +809,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.persist.jobstatus.active</name>
|
|
||||||
<value>true</value>
|
|
||||||
<description>Indicates if persistency of job status information is
|
|
||||||
active or not.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.persist.jobstatus.hours</name>
|
|
||||||
<value>1</value>
|
|
||||||
<description>The number of hours job status information is persisted in DFS.
|
|
||||||
The job status information will be available after it drops of the memory
|
|
||||||
queue and between jobtracker restarts. With a zero value the job status
|
|
||||||
information is not persisted at all in DFS.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.persist.jobstatus.dir</name>
|
|
||||||
<value>/jobtracker/jobsInfo</value>
|
|
||||||
<description>The directory where the job status information is persisted
|
|
||||||
in a file system to be available after it drops of the memory queue and
|
|
||||||
between jobtracker restarts.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.task.profile</name>
|
<name>mapreduce.task.profile</name>
|
||||||
|
@ -1241,8 +866,8 @@
|
||||||
<description> The number of Task attempts AFTER which skip mode
|
<description> The number of Task attempts AFTER which skip mode
|
||||||
will be kicked off. When skip mode is kicked off, the
|
will be kicked off. When skip mode is kicked off, the
|
||||||
tasks reports the range of records which it will process
|
tasks reports the range of records which it will process
|
||||||
next, to the TaskTracker. So that on failures, TT knows which
|
next, to the MR ApplicationMaster. So that on failures, the MR AM
|
||||||
ones are possibly the bad records. On further executions,
|
knows which ones are possibly the bad records. On further executions,
|
||||||
those are skipped.
|
those are skipped.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
@ -1325,15 +950,6 @@
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<!-- Proxy Configuration -->
|
<!-- Proxy Configuration -->
|
||||||
<property>
|
|
||||||
<name>mapreduce.jobtracker.taskcache.levels</name>
|
|
||||||
<value>2</value>
|
|
||||||
<description> This is the max level of the task cache. For example, if
|
|
||||||
the level is 2, the tasks cached are at the host level and at the rack
|
|
||||||
level.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.queuename</name>
|
<name>mapreduce.job.queuename</name>
|
||||||
<value>default</value>
|
<value>default</value>
|
||||||
|
@ -1359,13 +975,13 @@
|
||||||
<description> Specifies whether ACLs should be checked
|
<description> Specifies whether ACLs should be checked
|
||||||
for authorization of users for doing various queue and job level operations.
|
for authorization of users for doing various queue and job level operations.
|
||||||
ACLs are disabled by default. If enabled, access control checks are made by
|
ACLs are disabled by default. If enabled, access control checks are made by
|
||||||
JobTracker and TaskTracker when requests are made by users for queue
|
MapReduce ApplicationMaster when requests are made by users for queue
|
||||||
operations like submit job to a queue and kill a job in the queue and job
|
operations like submit job to a queue and kill a job in the queue and job
|
||||||
operations like viewing the job-details (See mapreduce.job.acl-view-job)
|
operations like viewing the job-details (See mapreduce.job.acl-view-job)
|
||||||
or for modifying the job (See mapreduce.job.acl-modify-job) using
|
or for modifying the job (See mapreduce.job.acl-modify-job) using
|
||||||
Map/Reduce APIs, RPCs or via the console and web user interfaces.
|
Map/Reduce APIs, RPCs or via the console and web user interfaces.
|
||||||
For enabling this flag(mapreduce.cluster.acls.enabled), this is to be set
|
For enabling this flag, set to true in mapred-site.xml file of all
|
||||||
to true in mapred-site.xml on JobTracker node and on all TaskTracker nodes.
|
MapReduce clients (MR job submitting nodes).
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
@ -1418,8 +1034,8 @@
|
||||||
o job-level counters
|
o job-level counters
|
||||||
o task-level counters
|
o task-level counters
|
||||||
o tasks' diagnostic information
|
o tasks' diagnostic information
|
||||||
o task-logs displayed on the TaskTracker web-UI and
|
o task-logs displayed on the HistoryServer's web-UI and
|
||||||
o job.xml showed by the JobTracker's web-UI
|
o job.xml showed by the HistoryServer's web-UI
|
||||||
Every other piece of information of jobs is still accessible by any other
|
Every other piece of information of jobs is still accessible by any other
|
||||||
user, for e.g., JobStatus, JobProfile, list of jobs in the queue, etc.
|
user, for e.g., JobStatus, JobProfile, list of jobs in the queue, etc.
|
||||||
|
|
||||||
|
@ -1436,14 +1052,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.indexcache.mb</name>
|
|
||||||
<value>10</value>
|
|
||||||
<description> The maximum memory that a task tracker allows for the
|
|
||||||
index cache that is used when serving map outputs to reducers.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.job.token.tracking.ids.enabled</name>
|
<name>mapreduce.job.token.tracking.ids.enabled</name>
|
||||||
<value>false</value>
|
<value>false</value>
|
||||||
|
@ -1465,7 +1073,7 @@
|
||||||
<name>mapreduce.task.merge.progress.records</name>
|
<name>mapreduce.task.merge.progress.records</name>
|
||||||
<value>10000</value>
|
<value>10000</value>
|
||||||
<description> The number of records to process during merge before
|
<description> The number of records to process during merge before
|
||||||
sending a progress notification to the TaskTracker.
|
sending a progress notification to the MR ApplicationMaster.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
@ -1493,22 +1101,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.taskcontroller</name>
|
|
||||||
<value>org.apache.hadoop.mapred.DefaultTaskController</value>
|
|
||||||
<description>TaskController which is used to launch and manage task execution
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.group</name>
|
|
||||||
<value></value>
|
|
||||||
<description>Expert: Group to which TaskTracker belongs. If
|
|
||||||
LinuxTaskController is configured via mapreduce.tasktracker.taskcontroller,
|
|
||||||
the group owner of the task-controller binary should be same as this group.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>mapreduce.shuffle.port</name>
|
<name>mapreduce.shuffle.port</name>
|
||||||
<value>13562</value>
|
<value>13562</value>
|
||||||
|
@ -1528,42 +1120,6 @@
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<!-- Node health script variables -->
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.healthchecker.script.path</name>
|
|
||||||
<value></value>
|
|
||||||
<description>Absolute path to the script which is
|
|
||||||
periodicallyrun by the node health monitoring service to determine if
|
|
||||||
the node is healthy or not. If the value of this key is empty or the
|
|
||||||
file does not exist in the location configured here, the node health
|
|
||||||
monitoring service is not started.</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.healthchecker.interval</name>
|
|
||||||
<value>60000</value>
|
|
||||||
<description>Frequency of the node health script to be run,
|
|
||||||
in milliseconds</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.healthchecker.script.timeout</name>
|
|
||||||
<value>600000</value>
|
|
||||||
<description>Time after node health script should be killed if
|
|
||||||
unresponsive and considered that the script has failed.</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<property>
|
|
||||||
<name>mapreduce.tasktracker.healthchecker.script.args</name>
|
|
||||||
<value></value>
|
|
||||||
<description>List of arguments which are to be passed to
|
|
||||||
node health script when it is being launched comma seperated.
|
|
||||||
</description>
|
|
||||||
</property>
|
|
||||||
|
|
||||||
<!-- end of node health script variables -->
|
|
||||||
|
|
||||||
<!-- MR YARN Application properties -->
|
<!-- MR YARN Application properties -->
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
|
|
Loading…
Reference in New Issue