---
layout: doc_page
---
Example Production Hadoop Configuration
=======================================
The following configuration should work relatively well for Druid indexing and Hadoop. In the example, we are using Hadoop 2.4 with EC2 m1.xlarge nodes for NameNodes and cc2.8xlarge nodes for DataNodes.
### Core-site.xml
```
hadoop.tmp.dir
/mnt/persistent/hadoop
fs.defaultFS
hdfs://#{IP}:9000
fs.s3.impl
org.apache.hadoop.fs.s3native.NativeS3FileSystem
fs.s3.awsAccessKeyId
#{S3_ACCESS_KEY}
fs.s3.awsSecretAccessKey
#{S3_SECRET_KEY}
fs.s3.buffer.dir
/mnt/persistent/hadoop-s3n
fs.s3n.awsAccessKeyId
#{S3N_ACCESS_KEY}
fs.s3n.awsSecretAccessKey
#{S3N_SECRET_KEY}
io.compression.codecs
org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.Lz4Codec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec
io.seqfile.local.dir
/mnt/persistent/hadoop/io/local
```
### Mapred-site.xml
```
mapreduce.framework.name
yarn
mapreduce.jobtracker.address
#{JT_ADDR}:9001
mapreduce.jobtracker.http.address
#{JT_HTTP_ADDR}:9100
mapreduce.jobhistory.address
#{JH_ADDR}:10020
mapreduce.jobhistory.webapp.address
#{JH_WEBAPP_ADDR}:19888
mapreduce.tasktracker.http.address
#{TT_ADDR}:9103
mapreduce.job.reduces
21
mapreduce.job.jvm.numtasks
20
mapreduce.map.memory.mb
2048
mapreduce.map.java.opts
-server -Xmx1536m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
mapreduce.reduce.memory.mb
6144
mapreduce.reduce.java.opts
-server -Xmx2560m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
mapreduce.reduce.shuffle.parallelcopies
50
mapreduce.reduce.shuffle.input.buffer.percent
0.5
mapreduce.task.io.sort.mb
256
mapreduce.task.io.sort.factor
100
mapreduce.jobtracker.handler.count
64
mapreduce.tasktracker.http.threads
20
mapreduce.cluster.local.dir
/mnt/persistent/hadoop/mapred/local
mapreduce.jobhistory.recovery.enable
true
mapreduce.jobhistory.recovery.store.class
org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService
mapreduce.jobhistory.recovery.store.fs.uri
file://${hadoop.tmp.dir}/mapred-jobhistory-state
mapreduce.output.fileoutputformat.compress
false
mapreduce.map.output.compress
true
mapreduce.output.fileoutputformat.compress.type
BLOCK
mapreduce.map.output.compress.codec
org.apache.hadoop.io.compress.Lz4Codec
mapreduce.output.fileoutputformat.compress.codec
org.apache.hadoop.io.compress.GzipCodec
mapreduce.map.speculative
false
mapreduce.reduce.speculative
false
mapreduce.task.timeout
1800000
```
### Yarn-site.xml
```
yarn.resourcemanager.hostname
#{RM_HOSTNAME}
yarn.resourcemanager.scheduler.class
org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler
yarn.nodemanager.aux-services
mapreduce_shuffle
yarn.log-aggregation-enable
true
yarn.log.server.url
http://#{IP_LOG_SERVER}:19888/jobhistory/logs/
yarn.nodemanager.hostname
#{IP_ADDR}
yarn.scheduler.minimum-allocation-mb
512
yarn.nodemanager.resource.memory-mb
1024
yarn.nodemanager.resource.cpu-vcores
1
yarn.nodemanager.vmem-check-enabled
false
yarn.nodemanager.local-dirs
/mnt/persistent/hadoop/nm-local-dir
yarn.resourcemanager.recovery.enabled
false
yarn.resourcemanager.store.class
org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
yarn.resourcemanager.fs.state-store.uri
file://${hadoop.tmp.dir}/yarn-resourcemanager-state
yarn.resourcemanager.nodes.exclude-path
/mnt/persistent/hadoop/yarn-exclude.txt
```
### HDFS-site.xml
```
dfs.replication
3
dfs.namenode.datanode.registration.ip-hostname-check
false
dfs.hosts.exclude
/mnt/persistent/hadoop/hdfs-exclude.txt
dfs.datanode.data.dir
file:///mnt/persistent/hadoop/dfs/data
```
### Capacity-scheduler.xml
```
yarn.scheduler.capacity.maximum-am-resource-percent
0.1
yarn.scheduler.capacity.root.queues
default
yarn.scheduler.capacity.root.default.capacity
100
yarn.scheduler.capacity.root.default.user-limit-factor
1
yarn.scheduler.capacity.root.queues
default
yarn.scheduler.capacity.root.default.maximum-capacity
100
yarn.scheduler.capacity.root.default.state
RUNNING
yarn.scheduler.capacity.root.default.acl_submit_applications
*
yarn.scheduler.capacity.root.default.acl_administer_queue
*
yarn.scheduler.capacity.node-locality-delay
-1
```