Add more docs for production clusters

This commit is contained in:
fjy 2015-01-21 14:10:13 -08:00
parent 1f94de22c6
commit 7540f2f562
11 changed files with 88 additions and 197 deletions

View File

@ -20,7 +20,7 @@ Many of Druid's external dependencies can be plugged in as modules. Extensions c
|Property|Description|Default|
|--------|-----------|-------|
|`druid.extensions.remoteRepositories`|If this is not set to '[]', Druid will try to download extensions at the specified remote repository.|["http://repo1.maven.org/maven2/","https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"]|
|`druid.extensions.remoteRepositories`|If this is not set to '[]', Druid will try to download extensions at the specified remote repository.|["http://repo1.maven.org/maven2/", "https://metamx.artifactoryonline.com/metamx/pub-libs-releases-local"]|
|`druid.extensions.localRepository`|The local maven directory where extensions are installed. If this is set, remoteRepositories is not required.|[]|
|`druid.extensions.coordinates`|The list of extensions to include.|[]|

View File

@ -19,13 +19,13 @@ Clone Druid and build it:
git clone https://github.com/druid-io/druid.git druid
cd druid
git fetch --tags
git checkout druid-0.6.160
git checkout druid-0.7.0-rc1
./build.sh
```
### Downloading the DSK (Druid Standalone Kit)
[Download](http://static.druid.io/artifacts/releases/druid-services-0.6.160-bin.tar.gz) a stand-alone tarball and run it:
[Download](http://static.druid.io/artifacts/releases/druid-services-0.7.0-rc1-bin.tar.gz) a stand-alone tarball and run it:
``` bash
tar -xzf druid-services-0.X.X-bin.tar.gz

View File

@ -8,9 +8,9 @@ The previous examples are for Kafka 7. To support Kafka 8, a couple changes need
- Update realtime node's configs for Kafka 8 extensions
- e.g.
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-seven:0.6.160",...]`
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-seven",...]`
- becomes
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-eight:0.6.160",...]`
- `druid.extensions.coordinates=[...,"io.druid.extensions:druid-kafka-eight",...]`
- Update realtime task config for changed keys
- `firehose.type`, `plumber.rejectionPolicyFactory`, and all of `firehose.consumerProps` changes.

View File

@ -20,6 +20,51 @@ For general purposes of high availability, there should be at least 2 of every n
To setup a local Druid cluster, see [Simple Cluster Configuration](Simple-Cluster-Configuration.html).
### Common Configuration (common.runtime.properties)
```
# Extensions
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions", "io.druid.extensions:druid-histogram", "io.druid.extensions:mysql-metadata-storage"]
# Zookeeper
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
# Request logging, monitoring, and metrics
druid.request.logging.type=emitter
druid.request.logging.feed=druid_requests
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# Metadata storage
druid.metadata.storage.type=mysql
druid.metadata.storage.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid?characterEncoding=UTF-8
druid.metadata.storage.connector.user=#{MYSQL_USER}
druid.metadata.storage.connector.password=#{MYSQL_PW}
# Deep storage
druid.storage.type=s3
druid.s3.accessKey=#{S3_ACCESS_KEY}
druid.s3.secretKey=#{S3_SECRET_KEY}
# Caching
druid.cache.type=memcached
druid.cache.hosts=#{MEMCACHED_IPS}
druid.cache.expiration=2147483647
druid.cache.memcachedPrefix=d1
druid.cache.maxOperationQueueSize=1073741824
druid.cache.readBufferSize=10485760
# Indexing Service Service Discovery
druid.selectors.indexing.serviceName=druid:prod:overlord
```
### Overlord Node
Run:
@ -57,22 +102,6 @@ druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/overlord
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.160"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.metadata.storage.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
druid.metadata.storage.connector.user=#{MYSQL_USER}
druid.metadata.storage.connector.password=#{MYSQL_PW}
druid.metadata.storage.connector.useValidationQuery=true
druid.metadata.storage.tables.base=prod
# Only required if you are autoscaling middle managers
druid.indexer.autoscale.doAutoscale=true
druid.indexer.autoscale.strategy=ec2
@ -82,26 +111,15 @@ druid.indexer.autoscale.workerVersion=#{WORKER_VERSION}
# Upload all task logs to deep storage
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=#{LOGS_BUCKET}
druid.indexer.logs.s3Bucket=druid
druid.indexer.logs.s3Prefix=prod/logs/v1
# Run in remote mode
druid.indexer.runner.type=remote
druid.indexer.runner.compressZnodes=true
druid.indexer.runner.minWorkerVersion=#{WORKER_VERSION}
# Store all task state in MySQL
# Store all task state in the metadata storage
druid.indexer.storage.type=metadata
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### MiddleManager Node
@ -139,52 +157,30 @@ druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/middlemanager
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.160","io.druid.extensions:druid-kafka-seven:0.6.160"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
# Store task logs in deep storage
druid.indexer.logs.type=s3
druid.indexer.logs.s3Bucket=#{LOGS_BUCKET}
druid.indexer.logs.s3Prefix=prod/logs/v1
# Dedicate more resources to peons
# Resources for peons
druid.indexer.runner.javaOpts=-server -Xmx3g -XX:+UseG1GC -XX:MaxGCPauseMillis=100 -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
druid.indexer.task.baseTaskDir=/mnt/persistent/task/
druid.indexer.task.chathandler.type=announce
druid.indexer.fork.property.druid.indexer.hadoopWorkingPath=/tmp/druid-indexing
druid.indexer.fork.property.druid.computation.buffer.size=536870912
druid.indexer.fork.property.druid.processing.numThreads=3
druid.indexer.fork.property.druid.request.logging.type=file
druid.indexer.fork.property.druid.request.logging.dir=request_logs/
# Peon properties
druid.indexer.fork.property.druid.monitoring.monitors=["com.metamx.metrics.JvmMonitor"]
druid.indexer.fork.property.druid.processing.buffer.sizeBytes=536870912
druid.indexer.fork.property.druid.processing.numThreads=2
druid.indexer.fork.property.druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 0}]
druid.indexer.fork.property.druid.server.http.numThreads=50
druid.indexer.fork.property.druid.storage.type=s3
druid.indexer.fork.property.druid.storage.archiveBaseKey=prod
druid.indexer.fork.property.druid.storage.archiveBucket=aws-prod-druid-archive
druid.indexer.fork.property.druid.storage.baseKey=prod/v1
druid.indexer.fork.property.druid.storage.bucket=#{LOGS_BUCKET}
druid.indexer.fork.property.druid.storage.bucket=druid
druid.indexer.fork.property.druid.storage.type=s3
druid.worker.capacity=10
druid.worker.capacity=9
druid.worker.ip=#{IP_ADDR}
druid.worker.version=#{WORKER_VERSION}
druid.selectors.indexing.serviceName=druid:prod:overlord
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### Coordinator Node
@ -223,29 +219,6 @@ Runtime.properties:
druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/coordinator
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.metadata.storage.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
druid.metadata.storage.connector.user=#{MYSQL_USER}
druid.metadata.storage.connector.password=#{MYSQL_PW}
druid.metadata.storage.connector.useValidationQuery=true
druid.metadata.storage.tables.base=prod
druid.selectors.indexing.serviceName=druid:prod:overlord
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor", "com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### Historical Node
@ -286,34 +259,18 @@ druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/historical
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.160"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.s3.accessKey=#{ACCESS_KEY}
druid.s3.secretKey=#{SECRET_KEY}
druid.server.maxSize=300000000000
druid.server.http.numThreads=50
druid.historical.cache.useCache=true
druid.historical.cache.populateCache=true
druid.processing.buffer.sizeBytes=1073741824
druid.processing.numThreads=31
druid.server.http.numThreads=50
druid.server.maxSize=300000000000
druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 300000000000}]
druid.request.logging.type=file
druid.request.logging.dir=request_logs/
druid.monitoring.monitors=["io.druid.server.metrics.ServerMonitor", "com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```
### Broker Node
@ -327,15 +284,15 @@ io.druid.cli.Main server broker
Hardware:
```
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD)
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD - this hardware is a bit overkill for the broker but we choose it for simplicity)
```
JVM Configuration:
```
-server
-Xmx50g
-Xms50g
-Xmx25g
-Xms25g
-XX:NewSize=6g
-XX:MaxNewSize=6g
-XX:MaxDirectMemorySize=64g
@ -358,15 +315,6 @@ druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/broker
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.broker.cache.type=memcached
druid.broker.cache.hosts=#{MC_HOST1}:11211,#{MC_HOST2}:11211,#{MC_HOST3}:11211
druid.broker.cache.expiration=2147483647
druid.broker.cache.memcachedPrefix=d1
druid.broker.http.numConnections=20
druid.broker.http.readTimeout=PT5M
@ -374,17 +322,4 @@ druid.processing.buffer.sizeBytes=2147483648
druid.processing.numThreads=31
druid.server.http.numThreads=50
druid.request.logging.type=emitter
druid.request.logging.feed=druid_requests
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
# Emit metrics over http
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
# If you choose to compress ZK announcements, you must do so for every node type
druid.announcer.type=batch
druid.curator.compress=true
```

View File

@ -17,7 +17,7 @@ io.druid.cli.Main server router
Example Production Configuration
--------------------------------
In this example, we have two tiers in our production cluster: `hot` and `_default_tier`. Queries for the `hot` tier are routed through the `broker-hot` set of brokers, and queries for the `_default_tier` are routed through the `broker-cold` set of brokers. If any exceptions or network problems occur, queries are routed to the `broker-cold` set of brokers. In our example, we are running with a c3.2xlarge EC2 node.
In this example, we have two tiers in our production cluster: `hot` and `_default_tier`. Queries for the `hot` tier are routed through the `broker-hot` set of brokers, and queries for the `_default_tier` are routed through the `broker-cold` set of brokers. If any exceptions or network problems occur, queries are routed to the `broker-cold` set of brokers. In our example, we are running with a c3.2xlarge EC2 node. We assume a `common.runtime.properties` already exists.
JVM settings:
@ -49,15 +49,6 @@ druid.host=#{IP_ADDR}:8080
druid.port=8080
druid.service=druid/prod/router
druid.extensions.remoteRepositories=[]
druid.extensions.localRepository=lib
druid.extensions.coordinates=["io.druid.extensions:druid-histogram:0.6.160"]
druid.zk.service.host=#{ZK_IPs}
druid.zk.paths.base=/druid/prod
druid.discovery.curator.path=/prod/discovery
druid.processing.numThreads=1
druid.router.defaultBrokerServiceName=druid:prod:broker-cold
druid.router.coordinatorServiceName=druid:prod:coordinator
@ -66,16 +57,6 @@ druid.router.http.numConnections=50
druid.router.http.readTimeout=PT5M
druid.server.http.numThreads=100
druid.request.logging.type=emitter
druid.request.logging.feed=druid_requests
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
druid.emitter=http
druid.emitter.http.recipientBaseUrl=#{URL}
druid.curator.compress=true
```
Runtime Configuration

View File

@ -6,6 +6,17 @@ Simple Cluster Configuration
This simple Druid cluster configuration can be used for initially experimenting with Druid on your local machine. For a more realistic production Druid cluster, see [Production Cluster Configuration](Production-Cluster-Configuration.html).
### Common Configuration (common.runtime.properties)
```
# Extensions
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven"]
# Zookeeper (defaults to localhost)
# Metadata Storage (defaults to derby with no username and password)
```
### Overlord Node (Indexing Service)
Run:
@ -22,22 +33,8 @@ Configuration:
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
-Ddruid.host=localhost
-Ddruid.port=8080
-Ddruid.service=overlord
-Ddruid.zk.service.host=localhost
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.160"]
-Ddruid.metadata.storage.connector.connectURI=jdbc:mysql://localhost:3306/druid
-Ddruid.metadata.storage.connector.user=druid
-Ddruid.metadata.storage.connector.password=diurd
-Ddruid.selectors.indexing.serviceName=overlord
-Ddruid.indexer.queue.startDelay=PT0M
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
-Ddruid.indexer.runner.startPort=8088
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
```
@ -60,16 +57,6 @@ Configuration:
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=coordinator
druid.port=8082
druid.zk.service.host=localhost
druid.metadata.storage.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
druid.metadata.storage.connector.user=druid
druid.metadata.storage.connector.password=diurd
druid.coordinator.startDelay=PT70s
```
@ -91,12 +78,6 @@ Configuration:
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=historical
druid.port=8083
druid.zk.service.host=localhost
druid.server.maxSize=10000000000
druid.processing.buffer.sizeBytes=100000000
@ -123,12 +104,6 @@ Configuration:
-Duser.timezone=UTC
-Dfile.encoding=UTF-8
druid.host=localhost
druid.service=broker
druid.port=8084
druid.zk.service.host=localhost
druid.processing.buffer.sizeBytes=100000000
druid.processing.numThreads=1
```

View File

@ -49,7 +49,7 @@ There are two ways to setup Druid: download a tarball, or [Build From Source](Bu
### Download a Tarball
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.6.160-bin.tar.gz). Download this file to a directory of your choosing.
We've built a tarball that contains everything you'll need. You'll find it [here](http://static.druid.io/artifacts/releases/druid-services-0.7.0-rc1-bin.tar.gz). Download this file to a directory of your choosing.
You can extract the content within by issuing:
@ -60,7 +60,7 @@ tar -zxvf druid-services-*-bin.tar.gz
If you cd into the directory:
```
cd druid-services-0.6.160
cd druid-services-0.7.0-rc1
```
You should see a bunch of files:

View File

@ -13,7 +13,7 @@ In this tutorial, we will set up other types of Druid nodes and external depende
If you followed the first tutorial, you should already have Druid downloaded. If not, let's go back and do that first.
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.6.160-bin.tar.gz)
You can download the latest version of druid [here](http://static.druid.io/artifacts/releases/druid-services-0.7.0-rc1-bin.tar.gz)
and untar the contents within by issuing:

View File

@ -61,7 +61,7 @@
"type": "hadoop",
"inputSpec": {
"type": "static",
"paths": "/myPath/druid-services-0.6.160/examples/indexing/wikipedia_data.json"
"paths": "/myPath/druid-services-0.7.0-rc1/examples/indexing/wikipedia_data.json"
},
"metadataUpdateSpec": {
"type": "db",

View File

@ -63,7 +63,7 @@
"type": "hadoop",
"inputSpec": {
"type": "static",
"paths": "/myPath/druid-services-0.6.160/examples/indexing/wikipedia_data.json"
"paths": "/myPath/druid-services-0.7.0-rc1/examples/indexing/wikipedia_data.json"
}
},
"tuningConfig": {

View File

@ -63,7 +63,7 @@
"type": "index",
"firehose": {
"type": "local",
"baseDir": "/MyPath/druid-services-0.6.160/examples/indexing/",
"baseDir": "/MyPath/druid-services-0.7.0-rc1/examples/indexing/",
"filter": "wikipedia_data.json"
}
},