make all firehoses work with tasks, add a lot more documentation about configuration

This commit is contained in:
fjy 2014-05-28 16:33:59 -07:00
parent 49a8dc9e3e
commit 7be93a770a
21 changed files with 790 additions and 483 deletions

View File

@ -5,93 +5,6 @@ Broker Node Configuration
=========================
For general Broker Node information, see [here](Broker.html).
Quick Start
-----------
Run:
```
io.druid.cli.Main server broker
```
With the following JVM configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=broker
druid.port=8080
druid.zk.service.host=localhost
# Change these to make Druid faster
druid.processing.buffer.sizeBytes=100000000
druid.processing.numThreads=1
```
Production Configs
------------------
These production configs are using S3 as a deep store.
JVM settings:
```
-server
-Xmx#{HEAP_MAX}g
-Xms#{HEAP_MIN}g
-XX:NewSize=#{NEW_SIZE}g
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
-Dcom.sun.management.jmxremote.port=17071
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/broker
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.broker.cache.type=memcached
druid.broker.cache.hosts=#{MC_HOST1}:11211,#{MC_HOST2}:11211,#{MC_HOST3}:11211
druid.broker.cache.expiration=2147483647
druid.broker.cache.memcachedPrefix=d1
druid.broker.http.numConnections=20
druid.broker.http.readTimeout=PT5M
druid.server.http.numThreads=50
druid.request.logging.type=emitter
druid.request.logging.feed=druid_requests
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
Runtime Configuration
---------------------

View File

@ -5,96 +5,6 @@ Coordinator Node Configuration
==============================
For general Coordinator Node information, see [here](Coordinator.html).
Quick Start
-----------
Run:
```
io.druid.cli.Main server coordinator
```
With the following JVM configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=coordinator
druid.port=8082
druid.zk.service.host=localhost
druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
druid.db.connector.user=druid
druid.db.connector.password=diurd
druid.coordinator.startDelay=PT60s
```
Production Configs
------------------
These production configs are using S3 as a deep store.
JVM settings:
```
-server
-Xmx#{HEAP_MAX}g
-Xms#{HEAP_MIN}g
-XX:NewSize=#{NEW_SIZE}g
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
-Dcom.sun.management.jmxremote.port=17071
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/coordinator
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
druid.db.connector.user=#{MYSQL_USER}
druid.db.connector.password=#{MYSQL_PW}
druid.db.connector.useValidationQuery=true
druid.db.tables.base=prod
druid.coordinator.period=PT60S
druid.coordinator.period.indexingPeriod=PT1H
druid.coordinator.startDelay=PT300S
druid.coordinator.merge.on=false
druid.coordinator.conversion.on=false
druid.selectors.indexing.serviceName=druid:prod:indexer
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor", "com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
Runtime Configuration
---------------------

View File

@ -5,97 +5,4 @@ Historical Node Configuration
=============================
For general Historical Node information, see [here](Historical.html).
Quick Start
-----------
Run:
```
io.druid.cli.Main server historical
```
With the following JVM configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=historical
druid.port=8081
druid.zk.service.host=localhost
druid.server.maxSize=10000000000
# Change these to make Druid faster
druid.processing.buffer.sizeBytes=100000000
druid.processing.numThreads=1
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 10000000000}]
```
Production Configs
------------------
These production configs are using S3 as a deep store.
JVM settings:
```
-server
-Xmx#{HEAP_MAX}g
-Xms#{HEAP_MIN}g
-XX:NewSize=#{NEW_SIZE}g
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
-Dcom.sun.management.jmxremote.port=17071
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/historical/_default
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:#{DRUID_VERSION}"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.server.type=historical
druid.server.maxSize=#{SERVER_MAXSIZE}
druid.server.http.numThreads=50
druid.processing.buffer.sizeBytes=#{BUFFER_SIZE}}
druid.processing.numThreads=#{NUM_THREADS}}
druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": #{SERVER_MAXSIZE}}]
druid.request.logging.type=file
druid.request.logging.dir=request_logs/
druid.monitoring.monitors=["io.druid.server.metrics.ServerMonitor", "com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
The historical module uses several of the default modules in [Configuration](Configuration.html) and has no uniques configs of its own.

View File

@ -3,167 +3,6 @@ layout: doc_page
---
For general Indexing Service information, see [here](Indexing-Service.html).
Quick Start
-----------
```
io.druid.cli.Main server overlord
```
With the following JVM configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=localhost
-Ddruid.port=8080
-Ddruid.service=overlord
-Ddruid.zk.service.host=localhost
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
-Ddruid.db.connector.user=druid
-Ddruid.db.connector.password=diurd
-Ddruid.selectors.indexing.serviceName=overlord
-Ddruid.indexer.queue.startDelay=PT0M
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
-Ddruid.indexer.runner.startPort=8081
-Ddruid.indexer.fork.property.druid.computation.buffer.size=268435456
```
Production Configs
------------------
These production configs are using S3 as a deep store and running the indexing service in distributed mode.
JVM settings for both overlord and middle manager:
```
-server
-Xmx#{HEAP_MAX}g
-Xms#{HEAP_MIN}g
-XX:NewSize=#{NEW_SIZE}g
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
-Dcom.sun.management.jmxremote.port=17071
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
```
Runtime.properties for overlord:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/indexer
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
druid.db.connector.user=#{MYSQL_USER}
druid.db.connector.password=#{MYSQL_PW}
druid.db.connector.useValidationQuery=true
druid.db.tables.base=prod
druid.indexer.autoscale.doAutoscale=true
druid.indexer.autoscale.strategy=ec2
druid.indexer.autoscale.workerIdleTimeout=PT90m
druid.indexer.autoscale.terminatePeriod=PT5M
druid.indexer.autoscale.workerVersion=#{WORKER_VERSION}
druid.indexer.firehoseId.prefix=druid:prod:chat
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=#{INDEXER_LOGS_BUCKET}
druid.indexer.logs.s3Prefix=prod/logs/v1
druid.indexer.runner.type=remote
druid.indexer.runner.compressZnodes=true
druid.indexer.runner.minWorkerVersion=#{WORKER_VERSION}
druid.indexer.storage.type=db
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
Runtime.properties for middle manager:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/worker
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105","io.druid.extensions:druid-kafka-seven:0.6.105"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=#{INDEXER_LOGS_BUCKET}
druid.indexer.logs.s3Prefix=prod/logs/v1
druid.indexer.runner.javaOpts=-server -Xmx#{HEAP_MAX}g -Xms#{HEAP_MIN}g -XX:NewSize=#{NEW_SIZE}m -XX:MaxNewSize=#{MAX_NEW_SIZE}6m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
druid.indexer.runner.startPort=8081
druid.indexer.runner.taskDir=/mnt/persistent/task/
druid.indexer.task.taskDir=/mnt/persistent/task/
druid.indexer.task.chathandler.type=announce
druid.indexer.firehoseId.prefix=druid:prod:chat
druid.indexer.fork.property.druid.indexer.hadoopWorkingPath=/tmp/druid-indexing
druid.indexer.fork.property.druid.computation.buffer.size=#{BUFFER_SIZE}
druid.indexer.fork.property.druid.processing.numThreads=#{NUM_WORKER_THREADS}
druid.indexer.fork.property.druid.request.logging.type=file
druid.indexer.fork.property.druid.request.logging.dir=request_logs/
druid.indexer.fork.property.druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 0}]
druid.indexer.fork.property.druid.storage.type=s3
druid.indexer.fork.property.druid.storage.baseKey=prod/v1
druid.indexer.fork.property.druid.storage.bucket=#{INDEXER_LOGS_BUCKET}
druid.server.http.numThreads=20
druid.worker.capacity=#{NUM_WORKER_THREADS}
druid.worker.ip=#{IP_ADDR}
druid.worker.version=#{WORKER_VERSION}
druid.selectors.indexing.serviceName=druid:prod:indexer
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
#### Runtime Configuration
In addition to the configuration of some of the default modules in [Configuration](Configuration.html), the overlord has the following basic configs:

View File

@ -0,0 +1,389 @@
---
layout: doc_page
---
Production Cluster Configuration
================================
This production Druid cluster assumes that MySQL and Zookeeper are already set up. The deep storage that is used for examples is S3 and memcached is used for a distributed cache.
The nodes that respond to queries (Historical, Broker, and Middle manager nodes) will use as many cores as are available, depending on usage, so it is best to keep these on dedicated machines. The upper limit of effectively utilized cores is not well characterized yet and would depend on types of queries, query load, and the schema. Historical daemons should have a heap a size of at least 1GB per core for normal usage, but could be squeezed into a smaller heap for testing. Since in-memory caching is essential for good performance, even more RAM is better. Broker nodes will use RAM for caching, so they do more than just route queries. SSDs are highly recommended for Historical nodes not all data is loaded in available memory.
The nodes that are responsible for coordination (Coordinator and Overlord nodes) require much less processing.
The effective utilization of cores by Zookeeper, MySQL, and Coordinator nodes is likely to be between 1 and 2 for each process/daemon, so these could potentially share a machine with lots of cores. These daemons work with heap a size between 500MB and 1GB.
We'll use r3.8xlarge nodes for query facing nodes and m1.xlarge nodes for coordination nodes. The following examples work relatively well in production, however, a more optimized tuning for the nodes we selected and more optimal hardware for a Druid cluster are both definitely possible.
For general purposes of high availability, there should be at least 2 of every node type.
To setup a local Druid cluster, see [Simple Cluster Configuration](Simple-Cluster-Configuration.html).
### Overlord Node
Run:
```
io.druid.cli.Main server overlord
```
Hardware:
```
m1.xlarge (Cores: 4, Memory: 15.0 GB)
```
JVM Configuration:
```
-server
-Xmx4g
-Xms4g
-XX:NewSize=256m
-XX:MaxNewSize=256m
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/overlord
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
druid.db.connector.user=#{MYSQL_USER}
druid.db.connector.password=#{MYSQL_PW}
druid.db.connector.useValidationQuery=true
druid.db.tables.base=prod
# Only required if you are autoscaling middle managers
druid.indexer.autoscale.doAutoscale=true
druid.indexer.autoscale.strategy=ec2
druid.indexer.autoscale.workerIdleTimeout=PT90m
druid.indexer.autoscale.terminatePeriod=PT5M
druid.indexer.autoscale.workerVersion=#{WORKER_VERSION}
# Upload all task logs to deep storage
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=#{LOGS_BUCKET}
druid.indexer.logs.s3Prefix=prod/logs/v1
# Run in remote mode
druid.indexer.runner.type=remote
druid.indexer.runner.compressZnodes=true
druid.indexer.runner.minWorkerVersion=#{WORKER_VERSION}
# Store all task state in MySQL
druid.indexer.storage.type=db
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### MiddleManager Node
Run:
```
io.druid.cli.Main server middleManager
```
Hardware:
```
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD)
```
JVM Configuration:
```
-server
-Xmx64m
-Xms64m
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/middlemanager
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105","io.druid.extensions:druid-kafka-seven:0.6.105"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
# Store task logs in deep storage
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=#{LOGS_BUCKET}
druid.indexer.logs.s3Prefix=prod/logs/v1
# Dedicate more resources to peons
druid.indexer.runner.javaOpts=-server -Xmx6g -Xms6g -XX:NewSize=256m -XX:MaxNewSize=256m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
druid.indexer.runner.taskDir=/mnt/persistent/task/
druid.indexer.task.taskDir=/mnt/persistent/task/
druid.indexer.task.chathandler.type=announce
druid.indexer.fork.property.druid.indexer.hadoopWorkingPath=/tmp/druid-indexing
druid.indexer.fork.property.druid.computation.buffer.size=536870912
druid.indexer.fork.property.druid.processing.numThreads=3
druid.indexer.fork.property.druid.request.logging.type=file
druid.indexer.fork.property.druid.request.logging.dir=request_logs/
druid.indexer.fork.property.druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 0}]
druid.indexer.fork.property.druid.server.http.numThreads=50
druid.indexer.fork.property.druid.storage.type=s3
druid.indexer.fork.property.druid.storage.baseKey=prod/v1
druid.indexer.fork.property.druid.storage.bucket=#{LOGS_BUCKET}
druid.worker.capacity=10
druid.worker.ip=#{IP_ADDR}
druid.worker.version=#{WORKER_VERSION}
druid.selectors.indexing.serviceName=druid:prod:overlord
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### Coordinator Node
Run:
```
io.druid.cli.Main server coordinator
```
Hardware:
```
m1.xlarge (Cores: 4, Memory: 15.0 GB)
```
JVM Configuration:
```
-server
-Xmx10g
-Xms10g
-XX:NewSize=512m
-XX:MaxNewSize=512m
-XX:+G1GC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/coordinator
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
druid.db.connector.user=#{MYSQL_USER}
druid.db.connector.password=#{MYSQL_PW}
druid.db.connector.useValidationQuery=true
druid.db.tables.base=prod
druid.selectors.indexing.serviceName=druid:prod:overlord
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor", "com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### Historical Node
Run:
```
io.druid.cli.Main server historical
```
Hardware:
```
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD)
```
JVM Configuration:
```
-server
-Xmx12g
-Xms12g
-XX:NewSize=6g
-XX:MaxNewSize=6g
-XX:MaxDirectMemorySize=32g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/historical
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.server.maxSize=300000000000
druid.server.http.numThreads=50
druid.processing.buffer.sizeBytes=1073741824
druid.processing.numThreads=31
druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 300000000000}]
druid.request.logging.type=file
druid.request.logging.dir=request_logs/
druid.monitoring.monitors=["io.druid.server.metrics.ServerMonitor", "com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### Broker Node
Run:
```
io.druid.cli.Main server broker
```
Hardware:
```
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD)
```
JVM Configuration:
```
-server
-Xmx50g
-Xms50g
-XX:NewSize=6g
-XX:MaxNewSize=6g
-XX:MaxDirectMemorySize=64g
-XX:+UseConcMarkSweepGC
-XX:+PrintGCDetails
-XX:+PrintGCTimeStamps
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Djava.io.tmpdir=/mnt/tmp
-Dcom.sun.management.jmxremote.port=17071
-Dcom.sun.management.jmxremote.authenticate=false
-Dcom.sun.management.jmxremote.ssl=false
```
Runtime.properties:
```
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/broker
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.broker.cache.type=memcached
druid.broker.cache.hosts=#{MC_HOST1}:11211,#{MC_HOST2}:11211,#{MC_HOST3}:11211
druid.broker.cache.expiration=2147483647
druid.broker.cache.memcachedPrefix=d1
druid.broker.http.numConnections=20
druid.broker.http.readTimeout=PT5M
druid.processing.buffer.sizeBytes=2147483648
druid.processing.numThreads=31
druid.server.http.numThreads=50
druid.request.logging.type=emitter
druid.request.logging.feed=druid_requests
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```

View File

@ -0,0 +1,135 @@
---
layout: doc_page
---
Simple Cluster Configuration
===============================
This simple Druid cluster configuration can be used for initially experimenting with Druid on your local machine. For a more realistic production Druid cluster, see [Production Cluster Configuration](Production-Cluster-Configuration.html).
### Overlord Node (Indexing Service)
Run:
```
io.druid.cli.Main server overlord
```
Configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=localhost
-Ddruid.port=8080
-Ddruid.service=overlord
-Ddruid.zk.service.host=localhost
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.105"]
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
-Ddruid.db.connector.user=druid
-Ddruid.db.connector.password=diurd
-Ddruid.selectors.indexing.serviceName=overlord
-Ddruid.indexer.queue.startDelay=PT0M
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
```
This runs the indexing service in local mode, and can support real-time ingestion tasks (with one processing thread for queries).
### Coordinator Node
Run:
```
io.druid.cli.Main server coordinator
```
Configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=coordinator
druid.port=8082
druid.zk.service.host=localhost
druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
druid.db.connector.user=druid
druid.db.connector.password=diurd
druid.coordinator.startDelay=PT70s
```
This simple coordinator assumes local deep storage.
### Historical Node
Run:
```
io.druid.cli.Main server historical
```
Configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=historical
druid.port=8083
druid.zk.service.host=localhost
druid.server.maxSize=10000000000
druid.processing.buffer.sizeBytes=100000000
druid.processing.numThreads=1
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 10000000000}]
```
This historical node will be able to load 100 MB of data and be able to process 1 segment at a time. Deep storage is assumed to be local storage here.
### Broker Node
Run:
```
io.druid.cli.Main server broker
```
Configuration:
```
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=broker
druid.port=8084
druid.zk.service.host=localhost
druid.processing.buffer.sizeBytes=100000000
druid.processing.numThreads=1
```
This simple broker will run groupBys in a single thread.

View File

@ -85,21 +85,28 @@ config/overlord/runtime.properties
The configurations for the overlord node are as follows:
```bash
druid.host=localhost
druid.port=8087
druid.service=overlord
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.zk.service.host=localhost
-Ddruid.host=localhost
-Ddruid.port=8080
-Ddruid.service=overlord
druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
druid.db.connector.user=druid
druid.db.connector.password=diurd
-Ddruid.zk.service.host=localhost
druid.selectors.indexing.serviceName=overlord
druid.indexer.queue.startDelay=PT0M
druid.indexer.runner.javaOpts="-server -Xmx1g"
druid.indexer.runner.startPort=8088
druid.indexer.fork.property.druid.computation.buffer.size=268435456
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.105"]
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
-Ddruid.db.connector.user=druid
-Ddruid.db.connector.password=diurd
-Ddruid.selectors.indexing.serviceName=overlord
-Ddruid.indexer.queue.startDelay=PT0M
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
```
If you are interested in reading more about these configurations, see [here](Indexing-Service.html).

View File

@ -14,13 +14,17 @@ h2. Getting Started
* "Tutorial: Loading Your Data Part 2":./Tutorial:-Loading-Your-Data-Part-2.html
* "Tutorial: All About Queries":./Tutorial:-All-About-Queries.html
h2. Booting a Druid Cluster
* "Simple Cluster Configuration":Simple-Cluster-Configuration.html
* "Production Cluster Configuration":Production-Cluster-Configuration.html
h2. Configuration
* "Common Configuration":Configuration.html
* "Realtime":Realtime-Config.html
* "Indexing Service":Indexing-Service-Config.html
* "Coordinator":Coordinator-Config.html
* "Historical":Historical-Config.html
* "Broker":Broker-Config.html
* "Indexing Service":Indexing-Service-Config.html
* "Realtime":Realtime-Config.html
h2. Data Ingestion
* "Realtime":./Realtime-ingestion.html
@ -32,7 +36,6 @@ h2. Data Ingestion
h2. Operations
* "Extending Druid":./Modules.html
* "Cluster Setup":./Cluster-setup.html
* "Booting a Production Cluster":./Booting-a-production-cluster.html
* "Performance FAQ":./Performance-FAQ.html

View File

@ -0,0 +1,73 @@
{
"type": "index_realtime",
"schema": {
"dataSource": "wikipedia",
"aggregators": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
}
],
"indexGranularity": "none"
},
"fireDepartmentConfig": {
"maxRowsInMemory": 500000,
"intermediatePersistPeriod": "PT10m"
},
"firehose": {
"type": "kafka-0.7.2",
"consumerProps": {
"zk.connect": "localhost:2181",
"zk.connectiontimeout.ms": "15000",
"zk.sessiontimeout.ms": "15000",
"zk.synctime.ms": "5000",
"groupid": "druid-example",
"fetch.size": "1048586",
"autooffset.reset": "largest",
"autocommit.enable": "false"
},
"feed": "wikipedia",
"parser": {
"timestampSpec": {
"column": "timestamp"
},
"data": {
"format": "json",
"dimensions": [
"page",
"language",
"user",
"unpatrolled",
"newPage",
"robot",
"anonymous",
"namespace",
"continent",
"country",
"region",
"city"
]
}
}
},
"windowPeriod": "PT10m",
"segmentGranularity": "hour",
"rejectionPolicy": {
"type": "test"
}
}

View File

@ -0,0 +1,71 @@
{
"type": "index_realtime",
"schema": {
"dataSource": "wikipedia",
"aggregators": [
{
"type": "count",
"name": "count"
},
{
"type": "doubleSum",
"name": "added",
"fieldName": "added"
},
{
"type": "doubleSum",
"name": "deleted",
"fieldName": "deleted"
},
{
"type": "doubleSum",
"name": "delta",
"fieldName": "delta"
}
],
"indexGranularity": "none"
},
"fireDepartmentConfig": {
"maxRowsInMemory": 500000,
"intermediatePersistPeriod": "PT10m"
},
"firehose": {
"type": "irc",
"nick": "wiki1234567890",
"host": "irc.wikimedia.org",
"channels": [
"#en.wikipedia",
"#fr.wikipedia",
"#de.wikipedia",
"#ja.wikipedia"
],
"decoder": {
"type": "wikipedia",
"namespaces": {
"#en.wikipedia": {
"_empty_": "main",
"Category": "category",
"$1 talk": "project talk",
"Template talk": "template talk",
"Help talk": "help talk",
"Media": "media",
"MediaWiki talk": "mediawiki talk",
"File talk": "file talk",
"MediaWiki": "mediawiki",
"User": "user",
"File": "file",
"User talk": "user talk",
"Template": "template",
"Help": "help",
"Special": "special",
"Talk": "talk",
"Category talk": "category talk"
}
}
},
"timeDimension": "timestamp",
"timeFormat": "iso"
},
"windowPeriod": "PT10m",
"segmentGranularity": "hour"
}

View File

@ -8,4 +8,4 @@ druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
druid.db.connector.user=druid
druid.db.connector.password=diurd
druid.coordinator.startDelay=PT60s
druid.coordinator.startDelay=PT70s

View File

@ -1,17 +1,22 @@
druid.host=localhost
druid.port=8087
druid.service=overlord
-server
-Xmx256m
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.zk.service.host=localhost
-Ddruid.host=localhost
-Ddruid.port=8080
-Ddruid.service=overlord
druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
druid.db.connector.user=druid
druid.db.connector.password=diurd
-Ddruid.zk.service.host=localhost
druid.selectors.indexing.serviceName=overlord
druid.indexer.queue.startDelay=PT0M
druid.indexer.runner.javaOpts="-server -Xmx1g"
druid.indexer.runner.startPort=8088
druid.indexer.fork.property.druid.computation.buffer.size=268435456
druid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.105"]
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
-Ddruid.db.connector.user=druid
-Ddruid.db.connector.password=diurd
-Ddruid.selectors.indexing.serviceName=overlord
-Ddruid.indexer.queue.startDelay=PT0M
-Ddruid.indexer.runner.javaOpts="-server -Xmx256m"
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000

View File

@ -33,7 +33,6 @@ import com.metamx.common.IAE;
import com.metamx.common.ISE;
import com.metamx.common.logger.Logger;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.SpatialDimensionSchema;
import io.druid.data.input.impl.StringInputRowParser;
import io.druid.query.aggregation.AggregatorFactory;
import io.druid.segment.IndexIO;
@ -61,7 +60,6 @@ import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
@ -612,16 +610,10 @@ public class IndexGeneratorJob implements Jobby
private IncrementalIndex makeIncrementalIndex(Bucket theBucket, AggregatorFactory[] aggs)
{
List<SpatialDimensionSchema> spatialDimensionSchemas = config.getSchema().getDataSchema().getParser() == null
? Lists.<SpatialDimensionSchema>newArrayList()
: config.getSchema().getDataSchema().getParser()
.getParseSpec()
.getDimensionsSpec()
.getSpatialDimensions();
return new IncrementalIndex(
new IncrementalIndexSchema.Builder()
.withMinTimestamp(theBucket.time.getMillis())
.withSpatialDimensions(spatialDimensionSchemas)
.withSpatialDimensions(config.getSchema().getDataSchema().getParser())
.withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
.withMetrics(aggs)
.build()

View File

@ -53,7 +53,7 @@ public class ForkingTaskRunnerConfig
@JsonProperty
@Min(1024)
@Max(65535)
private int startPort = 8080;
private int startPort = 8081;
@JsonProperty
@NotNull

View File

@ -22,6 +22,7 @@ package io.druid.segment.incremental;
import com.google.common.base.Function;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import io.druid.data.input.impl.InputRowParser;
import io.druid.data.input.impl.SpatialDimensionSchema;
import io.druid.granularity.QueryGranularity;
import io.druid.query.aggregation.AggregatorFactory;
@ -126,6 +127,20 @@ public class IncrementalIndexSchema
return this;
}
public Builder withSpatialDimensions(InputRowParser parser)
{
if (parser != null
&& parser.getParseSpec() != null
&& parser.getParseSpec().getDimensionsSpec() != null
&& parser.getParseSpec().getDimensionsSpec().getSpatialDimensions() != null) {
this.spatialDimensions = parser.getParseSpec().getDimensionsSpec().getSpatialDimensions();
} else {
this.spatialDimensions = Lists.newArrayList();
}
return this;
}
public Builder withSpatialDimensions(List<SpatialDimensionSchema> spatialDimensions)
{
this.spatialDimensions = spatialDimensions;

View File

@ -86,8 +86,8 @@ public class DruidProcessingModule implements Module
if (maxDirectMemory < memoryNeeded) {
throw new ProvisionException(
String.format(
"Not enough direct memory. Please adjust -XX:MaxDirectMemorySize, druid.computation.buffer.size, or druid.processing.numThreads: "
+ "maxDirectMemory[%,d], memoryNeeded[%,d] = druid.computation.buffer.size[%,d] * ( druid.processing.numThreads[%,d] + 1 )",
"Not enough direct memory. Please adjust -XX:MaxDirectMemorySize, druid.processing.buffer.sizeBytes, or druid.processing.numThreads: "
+ "maxDirectMemory[%,d], memoryNeeded[%,d] = druid.processing.buffer.sizeBytes[%,d] * ( druid.processing.numThreads[%,d] + 1 )",
maxDirectMemory,
memoryNeeded,
config.intermediateComputeSizeBytes(),

View File

@ -25,6 +25,7 @@ import com.fasterxml.jackson.databind.module.SimpleModule;
import com.google.inject.Binder;
import io.druid.data.input.ProtoBufInputRowParser;
import io.druid.initialization.DruidModule;
import io.druid.segment.realtime.firehose.IrcParser;
import java.util.Arrays;
import java.util.List;
@ -44,7 +45,8 @@ public class ParsersModule implements DruidModule
return Arrays.<Module>asList(
new SimpleModule("ParsersModule")
.registerSubtypes(
new NamedType(ProtoBufInputRowParser.class, "protobuf")
new NamedType(ProtoBufInputRowParser.class, "protobuf"),
new NamedType(IrcParser.class, "irc")
)
);
}

View File

@ -106,6 +106,7 @@ public class IrcFirehoseFactory implements FirehoseFactory<IrcParser>
private final String nick;
private final String host;
private final List<String> channels;
private final IrcDecoder decoder;
private final IrcParser parser;
@JsonCreator
@ -119,9 +120,34 @@ public class IrcFirehoseFactory implements FirehoseFactory<IrcParser>
this.nick = nick;
this.host = host;
this.channels = channels;
this.decoder = decoder;
this.parser = new IrcParser(decoder);
}
@JsonProperty
public String getNick()
{
return nick;
}
@JsonProperty
public String getHost()
{
return host;
}
@JsonProperty
public List<String> getChannels()
{
return channels;
}
@JsonProperty
public IrcDecoder getDecoder()
{
return decoder;
}
@Override
public Firehose connect(final IrcParser firehoseParser) throws IOException
{

View File

@ -19,25 +19,38 @@
package io.druid.segment.realtime.firehose;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeName;
import com.ircclouds.irc.api.domain.messages.ChannelPrivMsg;
import com.metamx.common.Pair;
import com.metamx.common.exception.FormattedException;
import io.druid.data.input.InputRow;
import io.druid.data.input.impl.DimensionsSpec;
import io.druid.data.input.impl.InputRowParser;
import io.druid.data.input.impl.ParseSpec;
import io.druid.data.input.impl.TimestampSpec;
import org.joda.time.DateTime;
/**
*/
@JsonTypeName("protoBuf")
public class IrcParser implements InputRowParser<Pair<DateTime, ChannelPrivMsg>>
{
private final IrcDecoder decoder;
public IrcParser(IrcDecoder decoder)
@JsonCreator
public IrcParser(@JsonProperty("decoder") IrcDecoder decoder)
{
this.decoder = decoder;
}
@JsonProperty
public IrcDecoder getDecoder()
{
return decoder;
}
@Override
public InputRow parse(Pair<DateTime, ChannelPrivMsg> msg) throws FormattedException
{
@ -47,7 +60,7 @@ public class IrcParser implements InputRowParser<Pair<DateTime, ChannelPrivMsg>>
@Override
public ParseSpec getParseSpec()
{
throw new UnsupportedOperationException();
return null;
}
@Override

View File

@ -73,6 +73,7 @@ class WikipediaIrcDecoder implements IrcDecoder
);
final Map<String, Map<String, String>> namespaces;
final String geoIpDatabase;
public WikipediaIrcDecoder( Map<String, Map<String, String>> namespaces) {
this(namespaces, null);
@ -86,7 +87,7 @@ class WikipediaIrcDecoder implements IrcDecoder
namespaces = Maps.newHashMap();
}
this.namespaces = namespaces;
this.geoIpDatabase = geoIpDatabase;
File geoDb;
if(geoIpDatabase != null) {
@ -116,6 +117,18 @@ class WikipediaIrcDecoder implements IrcDecoder
}
}
@JsonProperty
public Map<String, Map<String, String>> getNamespaces()
{
return namespaces;
}
@JsonProperty
public String getGeoIpDatabase()
{
return geoIpDatabase;
}
@Override
public InputRow decodeMessage(final DateTime timestamp, String channel, String msg)
{

View File

@ -177,17 +177,11 @@ public class Sink implements Iterable<FireHydrant>
private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema)
{
List<SpatialDimensionSchema> spatialDimensionSchemas = schema.getParser() == null
? Lists.<SpatialDimensionSchema>newArrayList()
: schema.getParser()
.getParseSpec()
.getDimensionsSpec()
.getSpatialDimensions();
IncrementalIndex newIndex = new IncrementalIndex(
new IncrementalIndexSchema.Builder()
.withMinTimestamp(minTimestamp)
.withQueryGranularity(schema.getGranularitySpec().getQueryGranularity())
.withSpatialDimensions(spatialDimensionSchemas)
.withSpatialDimensions(schema.getParser())
.withMetrics(schema.getAggregators())
.build()
);