druid/docs/content/configuration/hadoop.md

9.5 KiB

layout
doc_page

Example Production Hadoop Configuration

The following configuration should work relatively well for Druid indexing and Hadoop. In the example, we are using Hadoop 2.4 with EC2 m1.xlarge nodes for NameNodes and cc2.8xlarge nodes for DataNodes.

Core-site.xml

<configuration>

  <!-- Temporary directory on HDFS (but also sometimes local!) -->
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/mnt/persistent/hadoop</value>
  </property>
  
  <!-- S3 -->
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://#{IP}:9000</value>
  </property>
  <property>
    <name>fs.s3.impl</name>
    <value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value>
  </property>
  <property>
    <name>fs.s3.awsAccessKeyId</name>
    <value>#{S3_ACCESS_KEY}</value>
  </property>
  <property>
    <name>fs.s3.awsSecretAccessKey</name>
    <value>#{S3_SECRET_KEY}</value>
  </property>
  <property>
    <name>fs.s3.buffer.dir</name>
    <value>/mnt/persistent/hadoop-s3n</value>
  </property>
  <property>
    <name>fs.s3n.awsAccessKeyId</name>
    <value>#{S3N_ACCESS_KEY}</value>
  </property>
  <property>
    <name>fs.s3n.awsSecretAccessKey</name>
    <value>#{S3N_SECRET_KEY}</value>
  </property>

  <!-- Compression -->
  <property>
    <name>io.compression.codecs</name>
    <value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.Lz4Codec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>

  <!-- JBOD -->
  <property>
    <name>io.seqfile.local.dir</name>
    <value>/mnt/persistent/hadoop/io/local</value>
  </property>

</configuration>

Mapred-site.xml

<configuration>

  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>

  <property>
    <name>mapreduce.jobtracker.address</name>
    <value>#{JT_ADDR}:9001</value>
  </property>
  <property>
    <name>mapreduce.jobtracker.http.address</name>
    <value>#{JT_HTTP_ADDR}:9100</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>#{JH_ADDR}:10020</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>#{JH_WEBAPP_ADDR}:19888</value>
  </property>
  <property>
    <name>mapreduce.tasktracker.http.address</name>
    <value>#{TT_ADDR}:9103</value>
  </property>

  <!-- Memory and concurrency tuning -->
  <property>
    <name>mapreduce.job.reduces</name>
    <value>21</value>
  </property>
  <property>
  <property>
    <name>mapreduce.job.jvm.numtasks</name>
    <value>20</value>
  </property>
  <property>
    <name>mapreduce.map.memory.mb</name>
    <value>2048</value>
  </property>
  <property>
    <name>mapreduce.map.java.opts</name>
    <value>-server -Xmx1536m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps</value>
  </property>
  <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>6144</value>
  </property>
  <property>
    <name>mapreduce.reduce.java.opts</name>
    <value>-server -Xmx2560m -Duser.timezone=UTC -Dfile.encoding=UTF-8 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.parallelcopies</name>
    <value>50</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.input.buffer.percent</name>
    <value>0.5</value>
  </property>
  <property>
    <name>mapreduce.task.io.sort.mb</name>
    <value>256</value>
  </property>
  <property>
    <name>mapreduce.task.io.sort.factor</name>
    <value>100</value>
  </property>
  <property>
    <name>mapreduce.jobtracker.handler.count</name>
    <value>64</value>
  </property>
  <property>
    <name>mapreduce.tasktracker.http.threads</name>
    <value>20</value>
  </property>
  
  <!-- JBOD -->
  <property>
    <name>mapreduce.cluster.local.dir</name>
    <value>/mnt/persistent/hadoop/mapred/local</value>
  </property>

  <!-- Job history server persistent state -->
  <property>
    <name>mapreduce.jobhistory.recovery.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.recovery.store.class</name>
    <value>org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.recovery.store.fs.uri</name>
    <value>file://${hadoop.tmp.dir}/mapred-jobhistory-state</value>
  </property>

  <!-- Compression -->
  <property>
    <!-- Off by default, because it breaks Druid indexing (at least, it does it druid-0.6.10+). Jobs should turn
         it on if they need it. -->
    <name>mapreduce.output.fileoutputformat.compress</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.map.output.compress</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.output.fileoutputformat.compress.type</name>
    <value>BLOCK</value>
  </property>
  <property>
    <name>mapreduce.map.output.compress.codec</name>
    <value>org.apache.hadoop.io.compress.Lz4Codec</value>
  </property>
  <property>
    <name>mapreduce.output.fileoutputformat.compress.codec</name>
    <value>org.apache.hadoop.io.compress.GzipCodec</value>
  </property>
  <!-- Speculative execution would violate various assumptions we've made in our system design -->
  <property>
    <name>mapreduce.map.speculative</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.reduce.speculative</name>
    <value>false</value>
  </property>

  <!-- Sometimes jobs take a long time to run, but really, they're okay. Examples: Long index persists,
       hadoop reading lots of empty files into a single mapper. Let's increase the timeout to 30 minutes. -->
  <property>
    <name>mapreduce.task.timeout</name>
    <value>1800000</value>
  </property>  

</configuration>

Yarn-site.xml

<configuration>

  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>#{RM_HOSTNAME}</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.log.server.url</name>
    <value>http://#{IP_LOG_SERVER}:19888/jobhistory/logs/</value>
  </property>
  <property>
    <name>yarn.nodemanager.hostname</name>
    <value>#{IP_ADDR}</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>512</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>1024</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
  </property>
  
  <!-- JBOD -->
  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/mnt/persistent/hadoop/nm-local-dir</value>
  </property>

  <!-- ResourceManager persistent state doesn't work well in tests yet, so disable it -->
  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore</value>
  </property>
  <property>
    <name>yarn.resourcemanager.fs.state-store.uri</name>
    <value>file://${hadoop.tmp.dir}/yarn-resourcemanager-state</value>
  </property>

  <!-- Ability to exclude hosts -->
  <property>
    <name>yarn.resourcemanager.nodes.exclude-path</name>
    <value>/mnt/persistent/hadoop/yarn-exclude.txt</value>
  </property>

</configuration>

HDFS-site.xml

<configuration>

  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.hosts.exclude</name>
    <value>/mnt/persistent/hadoop/hdfs-exclude.txt</value>
  </property>

  <!-- JBOD -->
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:///mnt/persistent/hadoop/dfs/data</value>
  </property>

</configuration>

Capacity-scheduler.xml

<configuration>

  <property>
    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
    <value>0.1</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>default</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.default.capacity</name>
    <value>100</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>default</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
    <value>100</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.default.state</name>
    <value>RUNNING</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
    <value>*</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
    <value>*</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.node-locality-delay</name>
    <value>-1</value>
  </property>

</configuration>