mirror of https://github.com/apache/druid.git
make all firehoses work with tasks, add a lot more documentation about configuration
This commit is contained in:
parent
49a8dc9e3e
commit
7be93a770a
|
@ -5,93 +5,6 @@ Broker Node Configuration
|
|||
=========================
|
||||
For general Broker Node information, see [here](Broker.html).
|
||||
|
||||
Quick Start
|
||||
-----------
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server broker
|
||||
```
|
||||
|
||||
With the following JVM configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.host=localhost
|
||||
druid.service=broker
|
||||
druid.port=8080
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
# Change these to make Druid faster
|
||||
druid.processing.buffer.sizeBytes=100000000
|
||||
druid.processing.numThreads=1
|
||||
|
||||
```
|
||||
|
||||
Production Configs
|
||||
------------------
|
||||
These production configs are using S3 as a deep store.
|
||||
|
||||
JVM settings:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx#{HEAP_MAX}g
|
||||
-Xms#{HEAP_MIN}g
|
||||
-XX:NewSize=#{NEW_SIZE}g
|
||||
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
|
||||
-Dcom.sun.management.jmxremote.port=17071
|
||||
-Dcom.sun.management.jmxremote.authenticate=false
|
||||
-Dcom.sun.management.jmxremote.ssl=false
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/broker
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.broker.cache.type=memcached
|
||||
druid.broker.cache.hosts=#{MC_HOST1}:11211,#{MC_HOST2}:11211,#{MC_HOST3}:11211
|
||||
druid.broker.cache.expiration=2147483647
|
||||
druid.broker.cache.memcachedPrefix=d1
|
||||
druid.broker.http.numConnections=20
|
||||
druid.broker.http.readTimeout=PT5M
|
||||
|
||||
druid.server.http.numThreads=50
|
||||
|
||||
druid.request.logging.type=emitter
|
||||
druid.request.logging.feed=druid_requests
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
Runtime Configuration
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -5,96 +5,6 @@ Coordinator Node Configuration
|
|||
==============================
|
||||
For general Coordinator Node information, see [here](Coordinator.html).
|
||||
|
||||
Quick Start
|
||||
-----------
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server coordinator
|
||||
```
|
||||
|
||||
With the following JVM configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.host=localhost
|
||||
druid.service=coordinator
|
||||
druid.port=8082
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
|
||||
druid.db.connector.user=druid
|
||||
druid.db.connector.password=diurd
|
||||
|
||||
druid.coordinator.startDelay=PT60s
|
||||
```
|
||||
|
||||
Production Configs
|
||||
------------------
|
||||
These production configs are using S3 as a deep store.
|
||||
|
||||
JVM settings:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx#{HEAP_MAX}g
|
||||
-Xms#{HEAP_MIN}g
|
||||
-XX:NewSize=#{NEW_SIZE}g
|
||||
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
|
||||
-Dcom.sun.management.jmxremote.port=17071
|
||||
-Dcom.sun.management.jmxremote.authenticate=false
|
||||
-Dcom.sun.management.jmxremote.ssl=false
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/coordinator
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
|
||||
druid.db.connector.user=#{MYSQL_USER}
|
||||
druid.db.connector.password=#{MYSQL_PW}
|
||||
druid.db.connector.useValidationQuery=true
|
||||
druid.db.tables.base=prod
|
||||
|
||||
druid.coordinator.period=PT60S
|
||||
druid.coordinator.period.indexingPeriod=PT1H
|
||||
druid.coordinator.startDelay=PT300S
|
||||
druid.coordinator.merge.on=false
|
||||
druid.coordinator.conversion.on=false
|
||||
|
||||
druid.selectors.indexing.serviceName=druid:prod:indexer
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor", "com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
Runtime Configuration
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -5,97 +5,4 @@ Historical Node Configuration
|
|||
=============================
|
||||
For general Historical Node information, see [here](Historical.html).
|
||||
|
||||
Quick Start
|
||||
-----------
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server historical
|
||||
```
|
||||
|
||||
With the following JVM configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.host=localhost
|
||||
druid.service=historical
|
||||
druid.port=8081
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.server.maxSize=10000000000
|
||||
|
||||
# Change these to make Druid faster
|
||||
druid.processing.buffer.sizeBytes=100000000
|
||||
druid.processing.numThreads=1
|
||||
|
||||
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 10000000000}]
|
||||
```
|
||||
|
||||
Production Configs
|
||||
------------------
|
||||
These production configs are using S3 as a deep store.
|
||||
|
||||
JVM settings:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx#{HEAP_MAX}g
|
||||
-Xms#{HEAP_MIN}g
|
||||
-XX:NewSize=#{NEW_SIZE}g
|
||||
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
|
||||
-Dcom.sun.management.jmxremote.port=17071
|
||||
-Dcom.sun.management.jmxremote.authenticate=false
|
||||
-Dcom.sun.management.jmxremote.ssl=false
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/historical/_default
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:#{DRUID_VERSION}"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.s3.accessKey=#{ACCESS_KEY}
|
||||
druid.s3.secretKey=#{SECRET_KEY}
|
||||
|
||||
druid.server.type=historical
|
||||
druid.server.maxSize=#{SERVER_MAXSIZE}
|
||||
druid.server.http.numThreads=50
|
||||
|
||||
druid.processing.buffer.sizeBytes=#{BUFFER_SIZE}}
|
||||
druid.processing.numThreads=#{NUM_THREADS}}
|
||||
|
||||
druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": #{SERVER_MAXSIZE}}]
|
||||
|
||||
druid.request.logging.type=file
|
||||
druid.request.logging.dir=request_logs/
|
||||
|
||||
druid.monitoring.monitors=["io.druid.server.metrics.ServerMonitor", "com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
The historical module uses several of the default modules in [Configuration](Configuration.html) and has no uniques configs of its own.
|
||||
|
|
|
@ -3,167 +3,6 @@ layout: doc_page
|
|||
---
|
||||
For general Indexing Service information, see [here](Indexing-Service.html).
|
||||
|
||||
Quick Start
|
||||
-----------
|
||||
|
||||
```
|
||||
io.druid.cli.Main server overlord
|
||||
```
|
||||
|
||||
With the following JVM configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
-Ddruid.host=localhost
|
||||
-Ddruid.port=8080
|
||||
-Ddruid.service=overlord
|
||||
|
||||
-Ddruid.zk.service.host=localhost
|
||||
|
||||
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
-Ddruid.db.connector.user=druid
|
||||
-Ddruid.db.connector.password=diurd
|
||||
|
||||
-Ddruid.selectors.indexing.serviceName=overlord
|
||||
-Ddruid.indexer.queue.startDelay=PT0M
|
||||
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
|
||||
-Ddruid.indexer.runner.startPort=8081
|
||||
-Ddruid.indexer.fork.property.druid.computation.buffer.size=268435456
|
||||
```
|
||||
|
||||
Production Configs
|
||||
------------------
|
||||
These production configs are using S3 as a deep store and running the indexing service in distributed mode.
|
||||
|
||||
JVM settings for both overlord and middle manager:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx#{HEAP_MAX}g
|
||||
-Xms#{HEAP_MIN}g
|
||||
-XX:NewSize=#{NEW_SIZE}g
|
||||
-XX:MaxNewSize=#{MAX_NEW_SIZE}g
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
|
||||
-Dcom.sun.management.jmxremote.port=17071
|
||||
-Dcom.sun.management.jmxremote.authenticate=false
|
||||
-Dcom.sun.management.jmxremote.ssl=false
|
||||
```
|
||||
|
||||
Runtime.properties for overlord:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/indexer
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.s3.accessKey=#{ACCESS_KEY}
|
||||
druid.s3.secretKey=#{SECRET_KEY}
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
|
||||
druid.db.connector.user=#{MYSQL_USER}
|
||||
druid.db.connector.password=#{MYSQL_PW}
|
||||
druid.db.connector.useValidationQuery=true
|
||||
druid.db.tables.base=prod
|
||||
|
||||
druid.indexer.autoscale.doAutoscale=true
|
||||
druid.indexer.autoscale.strategy=ec2
|
||||
druid.indexer.autoscale.workerIdleTimeout=PT90m
|
||||
druid.indexer.autoscale.terminatePeriod=PT5M
|
||||
druid.indexer.autoscale.workerVersion=#{WORKER_VERSION}
|
||||
|
||||
druid.indexer.firehoseId.prefix=druid:prod:chat
|
||||
druid.indexer.logs.type=s3
|
||||
druid.indexer.logs.s3Bucket=#{INDEXER_LOGS_BUCKET}
|
||||
druid.indexer.logs.s3Prefix=prod/logs/v1
|
||||
druid.indexer.runner.type=remote
|
||||
druid.indexer.runner.compressZnodes=true
|
||||
druid.indexer.runner.minWorkerVersion=#{WORKER_VERSION}
|
||||
druid.indexer.storage.type=db
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
Runtime.properties for middle manager:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/worker
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105","io.druid.extensions:druid-kafka-seven:0.6.105"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.s3.accessKey=#{ACCESS_KEY}
|
||||
druid.s3.secretKey=#{SECRET_KEY}
|
||||
|
||||
druid.indexer.logs.type=s3
|
||||
druid.indexer.logs.s3Bucket=#{INDEXER_LOGS_BUCKET}
|
||||
druid.indexer.logs.s3Prefix=prod/logs/v1
|
||||
druid.indexer.runner.javaOpts=-server -Xmx#{HEAP_MAX}g -Xms#{HEAP_MIN}g -XX:NewSize=#{NEW_SIZE}m -XX:MaxNewSize=#{MAX_NEW_SIZE}6m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
|
||||
druid.indexer.runner.startPort=8081
|
||||
druid.indexer.runner.taskDir=/mnt/persistent/task/
|
||||
druid.indexer.task.taskDir=/mnt/persistent/task/
|
||||
druid.indexer.task.chathandler.type=announce
|
||||
|
||||
druid.indexer.firehoseId.prefix=druid:prod:chat
|
||||
|
||||
druid.indexer.fork.property.druid.indexer.hadoopWorkingPath=/tmp/druid-indexing
|
||||
druid.indexer.fork.property.druid.computation.buffer.size=#{BUFFER_SIZE}
|
||||
druid.indexer.fork.property.druid.processing.numThreads=#{NUM_WORKER_THREADS}
|
||||
druid.indexer.fork.property.druid.request.logging.type=file
|
||||
druid.indexer.fork.property.druid.request.logging.dir=request_logs/
|
||||
druid.indexer.fork.property.druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 0}]
|
||||
druid.indexer.fork.property.druid.storage.type=s3
|
||||
druid.indexer.fork.property.druid.storage.baseKey=prod/v1
|
||||
druid.indexer.fork.property.druid.storage.bucket=#{INDEXER_LOGS_BUCKET}
|
||||
druid.server.http.numThreads=20
|
||||
|
||||
druid.worker.capacity=#{NUM_WORKER_THREADS}
|
||||
druid.worker.ip=#{IP_ADDR}
|
||||
druid.worker.version=#{WORKER_VERSION}
|
||||
|
||||
druid.selectors.indexing.serviceName=druid:prod:indexer
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
#### Runtime Configuration
|
||||
|
||||
In addition to the configuration of some of the default modules in [Configuration](Configuration.html), the overlord has the following basic configs:
|
||||
|
|
|
@ -0,0 +1,389 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
Production Cluster Configuration
|
||||
================================
|
||||
|
||||
This production Druid cluster assumes that MySQL and Zookeeper are already set up. The deep storage that is used for examples is S3 and memcached is used for a distributed cache.
|
||||
|
||||
The nodes that respond to queries (Historical, Broker, and Middle manager nodes) will use as many cores as are available, depending on usage, so it is best to keep these on dedicated machines. The upper limit of effectively utilized cores is not well characterized yet and would depend on types of queries, query load, and the schema. Historical daemons should have a heap a size of at least 1GB per core for normal usage, but could be squeezed into a smaller heap for testing. Since in-memory caching is essential for good performance, even more RAM is better. Broker nodes will use RAM for caching, so they do more than just route queries. SSDs are highly recommended for Historical nodes not all data is loaded in available memory.
|
||||
|
||||
The nodes that are responsible for coordination (Coordinator and Overlord nodes) require much less processing.
|
||||
|
||||
The effective utilization of cores by Zookeeper, MySQL, and Coordinator nodes is likely to be between 1 and 2 for each process/daemon, so these could potentially share a machine with lots of cores. These daemons work with heap a size between 500MB and 1GB.
|
||||
|
||||
We'll use r3.8xlarge nodes for query facing nodes and m1.xlarge nodes for coordination nodes. The following examples work relatively well in production, however, a more optimized tuning for the nodes we selected and more optimal hardware for a Druid cluster are both definitely possible.
|
||||
|
||||
For general purposes of high availability, there should be at least 2 of every node type.
|
||||
|
||||
To setup a local Druid cluster, see [Simple Cluster Configuration](Simple-Cluster-Configuration.html).
|
||||
|
||||
### Overlord Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server overlord
|
||||
```
|
||||
|
||||
Hardware:
|
||||
|
||||
```
|
||||
m1.xlarge (Cores: 4, Memory: 15.0 GB)
|
||||
```
|
||||
|
||||
JVM Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx4g
|
||||
-Xms4g
|
||||
-XX:NewSize=256m
|
||||
-XX:MaxNewSize=256m
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/overlord
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.s3.accessKey=#{ACCESS_KEY}
|
||||
druid.s3.secretKey=#{SECRET_KEY}
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
|
||||
druid.db.connector.user=#{MYSQL_USER}
|
||||
druid.db.connector.password=#{MYSQL_PW}
|
||||
druid.db.connector.useValidationQuery=true
|
||||
druid.db.tables.base=prod
|
||||
|
||||
# Only required if you are autoscaling middle managers
|
||||
druid.indexer.autoscale.doAutoscale=true
|
||||
druid.indexer.autoscale.strategy=ec2
|
||||
druid.indexer.autoscale.workerIdleTimeout=PT90m
|
||||
druid.indexer.autoscale.terminatePeriod=PT5M
|
||||
druid.indexer.autoscale.workerVersion=#{WORKER_VERSION}
|
||||
|
||||
# Upload all task logs to deep storage
|
||||
druid.indexer.logs.type=s3
|
||||
druid.indexer.logs.s3Bucket=#{LOGS_BUCKET}
|
||||
druid.indexer.logs.s3Prefix=prod/logs/v1
|
||||
|
||||
# Run in remote mode
|
||||
druid.indexer.runner.type=remote
|
||||
druid.indexer.runner.compressZnodes=true
|
||||
druid.indexer.runner.minWorkerVersion=#{WORKER_VERSION}
|
||||
|
||||
# Store all task state in MySQL
|
||||
druid.indexer.storage.type=db
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
### MiddleManager Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server middleManager
|
||||
```
|
||||
|
||||
Hardware:
|
||||
|
||||
```
|
||||
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD)
|
||||
```
|
||||
|
||||
JVM Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx64m
|
||||
-Xms64m
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/middlemanager
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105","io.druid.extensions:druid-kafka-seven:0.6.105"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.s3.accessKey=#{ACCESS_KEY}
|
||||
druid.s3.secretKey=#{SECRET_KEY}
|
||||
|
||||
# Store task logs in deep storage
|
||||
druid.indexer.logs.type=s3
|
||||
druid.indexer.logs.s3Bucket=#{LOGS_BUCKET}
|
||||
druid.indexer.logs.s3Prefix=prod/logs/v1
|
||||
|
||||
# Dedicate more resources to peons
|
||||
druid.indexer.runner.javaOpts=-server -Xmx6g -Xms6g -XX:NewSize=256m -XX:MaxNewSize=256m -XX:+PrintGCDetails -XX:+PrintGCTimeStamps
|
||||
druid.indexer.runner.taskDir=/mnt/persistent/task/
|
||||
druid.indexer.task.taskDir=/mnt/persistent/task/
|
||||
druid.indexer.task.chathandler.type=announce
|
||||
|
||||
druid.indexer.fork.property.druid.indexer.hadoopWorkingPath=/tmp/druid-indexing
|
||||
druid.indexer.fork.property.druid.computation.buffer.size=536870912
|
||||
druid.indexer.fork.property.druid.processing.numThreads=3
|
||||
druid.indexer.fork.property.druid.request.logging.type=file
|
||||
druid.indexer.fork.property.druid.request.logging.dir=request_logs/
|
||||
druid.indexer.fork.property.druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 0}]
|
||||
druid.indexer.fork.property.druid.server.http.numThreads=50
|
||||
druid.indexer.fork.property.druid.storage.type=s3
|
||||
druid.indexer.fork.property.druid.storage.baseKey=prod/v1
|
||||
druid.indexer.fork.property.druid.storage.bucket=#{LOGS_BUCKET}
|
||||
|
||||
druid.worker.capacity=10
|
||||
druid.worker.ip=#{IP_ADDR}
|
||||
druid.worker.version=#{WORKER_VERSION}
|
||||
|
||||
druid.selectors.indexing.serviceName=druid:prod:overlord
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
### Coordinator Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server coordinator
|
||||
```
|
||||
|
||||
Hardware:
|
||||
|
||||
```
|
||||
m1.xlarge (Cores: 4, Memory: 15.0 GB)
|
||||
```
|
||||
|
||||
JVM Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx10g
|
||||
-Xms10g
|
||||
-XX:NewSize=512m
|
||||
-XX:MaxNewSize=512m
|
||||
-XX:+G1GC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/coordinator
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://#{MYSQL_URL}:3306/druid
|
||||
druid.db.connector.user=#{MYSQL_USER}
|
||||
druid.db.connector.password=#{MYSQL_PW}
|
||||
druid.db.connector.useValidationQuery=true
|
||||
druid.db.tables.base=prod
|
||||
|
||||
druid.selectors.indexing.serviceName=druid:prod:overlord
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor", "com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
### Historical Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server historical
|
||||
```
|
||||
|
||||
Hardware:
|
||||
|
||||
```
|
||||
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD)
|
||||
```
|
||||
|
||||
JVM Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx12g
|
||||
-Xms12g
|
||||
-XX:NewSize=6g
|
||||
-XX:MaxNewSize=6g
|
||||
-XX:MaxDirectMemorySize=32g
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/historical
|
||||
|
||||
druid.extensions.coordinates=["io.druid.extensions:druid-s3-extensions:0.6.105"]
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.s3.accessKey=#{ACCESS_KEY}
|
||||
druid.s3.secretKey=#{SECRET_KEY}
|
||||
|
||||
druid.server.maxSize=300000000000
|
||||
druid.server.http.numThreads=50
|
||||
|
||||
druid.processing.buffer.sizeBytes=1073741824
|
||||
druid.processing.numThreads=31
|
||||
|
||||
druid.segmentCache.locations=[{"path": "/mnt/persistent/zk_druid", "maxSize": 300000000000}]
|
||||
|
||||
druid.request.logging.type=file
|
||||
druid.request.logging.dir=request_logs/
|
||||
|
||||
druid.monitoring.monitors=["io.druid.server.metrics.ServerMonitor", "com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
||||
|
||||
### Broker Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server broker
|
||||
```
|
||||
|
||||
Hardware:
|
||||
|
||||
```
|
||||
r3.8xlarge (Cores: 32, Memory: 244 GB, SSD)
|
||||
```
|
||||
|
||||
JVM Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx50g
|
||||
-Xms50g
|
||||
-XX:NewSize=6g
|
||||
-XX:MaxNewSize=6g
|
||||
-XX:MaxDirectMemorySize=64g
|
||||
-XX:+UseConcMarkSweepGC
|
||||
-XX:+PrintGCDetails
|
||||
-XX:+PrintGCTimeStamps
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
-Djava.io.tmpdir=/mnt/tmp
|
||||
|
||||
-Dcom.sun.management.jmxremote.port=17071
|
||||
-Dcom.sun.management.jmxremote.authenticate=false
|
||||
-Dcom.sun.management.jmxremote.ssl=false
|
||||
```
|
||||
|
||||
Runtime.properties:
|
||||
|
||||
```
|
||||
druid.host=#{IP_ADDR}:8080
|
||||
druid.port=8080
|
||||
druid.service=druid/prod/broker
|
||||
|
||||
druid.zk.service.host=#{ZK_IPs}
|
||||
druid.zk.paths.base=/druid/prod
|
||||
|
||||
druid.discovery.curator.path=/prod/discovery
|
||||
|
||||
druid.broker.cache.type=memcached
|
||||
druid.broker.cache.hosts=#{MC_HOST1}:11211,#{MC_HOST2}:11211,#{MC_HOST3}:11211
|
||||
druid.broker.cache.expiration=2147483647
|
||||
druid.broker.cache.memcachedPrefix=d1
|
||||
druid.broker.http.numConnections=20
|
||||
druid.broker.http.readTimeout=PT5M
|
||||
|
||||
druid.processing.buffer.sizeBytes=2147483648
|
||||
druid.processing.numThreads=31
|
||||
|
||||
druid.server.http.numThreads=50
|
||||
|
||||
druid.request.logging.type=emitter
|
||||
druid.request.logging.feed=druid_requests
|
||||
|
||||
druid.monitoring.monitors=["com.metamx.metrics.SysMonitor","com.metamx.metrics.JvmMonitor"]
|
||||
|
||||
# Emit metrics over http
|
||||
druid.emitter=http
|
||||
druid.emitter.http.recipientBaseUrl=#{EMITTER_URL}
|
||||
|
||||
# If you choose to compress ZK announcements, you must do so for every node type
|
||||
druid.announcer.type=batch
|
||||
druid.curator.compress=true
|
||||
```
|
|
@ -0,0 +1,135 @@
|
|||
---
|
||||
layout: doc_page
|
||||
---
|
||||
Simple Cluster Configuration
|
||||
===============================
|
||||
|
||||
This simple Druid cluster configuration can be used for initially experimenting with Druid on your local machine. For a more realistic production Druid cluster, see [Production Cluster Configuration](Production-Cluster-Configuration.html).
|
||||
|
||||
### Overlord Node (Indexing Service)
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server overlord
|
||||
```
|
||||
|
||||
Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
-Ddruid.host=localhost
|
||||
-Ddruid.port=8080
|
||||
-Ddruid.service=overlord
|
||||
|
||||
-Ddruid.zk.service.host=localhost
|
||||
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.105"]
|
||||
|
||||
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
-Ddruid.db.connector.user=druid
|
||||
-Ddruid.db.connector.password=diurd
|
||||
|
||||
-Ddruid.selectors.indexing.serviceName=overlord
|
||||
-Ddruid.indexer.queue.startDelay=PT0M
|
||||
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
|
||||
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
|
||||
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
|
||||
```
|
||||
|
||||
This runs the indexing service in local mode, and can support real-time ingestion tasks (with one processing thread for queries).
|
||||
|
||||
### Coordinator Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server coordinator
|
||||
```
|
||||
|
||||
Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.host=localhost
|
||||
druid.service=coordinator
|
||||
druid.port=8082
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
|
||||
druid.db.connector.user=druid
|
||||
druid.db.connector.password=diurd
|
||||
|
||||
druid.coordinator.startDelay=PT70s
|
||||
```
|
||||
|
||||
This simple coordinator assumes local deep storage.
|
||||
|
||||
### Historical Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server historical
|
||||
```
|
||||
|
||||
Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.host=localhost
|
||||
druid.service=historical
|
||||
druid.port=8083
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.server.maxSize=10000000000
|
||||
|
||||
druid.processing.buffer.sizeBytes=100000000
|
||||
druid.processing.numThreads=1
|
||||
|
||||
druid.segmentCache.locations=[{"path": "/tmp/druid/indexCache", "maxSize"\: 10000000000}]
|
||||
```
|
||||
|
||||
This historical node will be able to load 100 MB of data and be able to process 1 segment at a time. Deep storage is assumed to be local storage here.
|
||||
|
||||
### Broker Node
|
||||
|
||||
Run:
|
||||
|
||||
```
|
||||
io.druid.cli.Main server broker
|
||||
```
|
||||
|
||||
Configuration:
|
||||
|
||||
```
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.host=localhost
|
||||
druid.service=broker
|
||||
druid.port=8084
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
|
||||
druid.processing.buffer.sizeBytes=100000000
|
||||
druid.processing.numThreads=1
|
||||
```
|
||||
|
||||
This simple broker will run groupBys in a single thread.
|
|
@ -85,21 +85,28 @@ config/overlord/runtime.properties
|
|||
The configurations for the overlord node are as follows:
|
||||
|
||||
```bash
|
||||
druid.host=localhost
|
||||
druid.port=8087
|
||||
druid.service=overlord
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
-Ddruid.host=localhost
|
||||
-Ddruid.port=8080
|
||||
-Ddruid.service=overlord
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
druid.db.connector.user=druid
|
||||
druid.db.connector.password=diurd
|
||||
-Ddruid.zk.service.host=localhost
|
||||
|
||||
druid.selectors.indexing.serviceName=overlord
|
||||
druid.indexer.queue.startDelay=PT0M
|
||||
druid.indexer.runner.javaOpts="-server -Xmx1g"
|
||||
druid.indexer.runner.startPort=8088
|
||||
druid.indexer.fork.property.druid.computation.buffer.size=268435456
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.105"]
|
||||
|
||||
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
-Ddruid.db.connector.user=druid
|
||||
-Ddruid.db.connector.password=diurd
|
||||
|
||||
-Ddruid.selectors.indexing.serviceName=overlord
|
||||
-Ddruid.indexer.queue.startDelay=PT0M
|
||||
-Ddruid.indexer.runner.javaOpts="-server -Xmx1g"
|
||||
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
|
||||
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
|
||||
```
|
||||
|
||||
If you are interested in reading more about these configurations, see [here](Indexing-Service.html).
|
||||
|
|
|
@ -14,13 +14,17 @@ h2. Getting Started
|
|||
* "Tutorial: Loading Your Data Part 2":./Tutorial:-Loading-Your-Data-Part-2.html
|
||||
* "Tutorial: All About Queries":./Tutorial:-All-About-Queries.html
|
||||
|
||||
h2. Booting a Druid Cluster
|
||||
* "Simple Cluster Configuration":Simple-Cluster-Configuration.html
|
||||
* "Production Cluster Configuration":Production-Cluster-Configuration.html
|
||||
|
||||
h2. Configuration
|
||||
* "Common Configuration":Configuration.html
|
||||
* "Realtime":Realtime-Config.html
|
||||
* "Indexing Service":Indexing-Service-Config.html
|
||||
* "Coordinator":Coordinator-Config.html
|
||||
* "Historical":Historical-Config.html
|
||||
* "Broker":Broker-Config.html
|
||||
* "Indexing Service":Indexing-Service-Config.html
|
||||
* "Realtime":Realtime-Config.html
|
||||
|
||||
h2. Data Ingestion
|
||||
* "Realtime":./Realtime-ingestion.html
|
||||
|
@ -32,7 +36,6 @@ h2. Data Ingestion
|
|||
|
||||
h2. Operations
|
||||
* "Extending Druid":./Modules.html
|
||||
* "Cluster Setup":./Cluster-setup.html
|
||||
* "Booting a Production Cluster":./Booting-a-production-cluster.html
|
||||
* "Performance FAQ":./Performance-FAQ.html
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
{
|
||||
"type": "index_realtime",
|
||||
"schema": {
|
||||
"dataSource": "wikipedia",
|
||||
"aggregators": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
}
|
||||
],
|
||||
"indexGranularity": "none"
|
||||
},
|
||||
"fireDepartmentConfig": {
|
||||
"maxRowsInMemory": 500000,
|
||||
"intermediatePersistPeriod": "PT10m"
|
||||
},
|
||||
"firehose": {
|
||||
"type": "kafka-0.7.2",
|
||||
"consumerProps": {
|
||||
"zk.connect": "localhost:2181",
|
||||
"zk.connectiontimeout.ms": "15000",
|
||||
"zk.sessiontimeout.ms": "15000",
|
||||
"zk.synctime.ms": "5000",
|
||||
"groupid": "druid-example",
|
||||
"fetch.size": "1048586",
|
||||
"autooffset.reset": "largest",
|
||||
"autocommit.enable": "false"
|
||||
},
|
||||
"feed": "wikipedia",
|
||||
"parser": {
|
||||
"timestampSpec": {
|
||||
"column": "timestamp"
|
||||
},
|
||||
"data": {
|
||||
"format": "json",
|
||||
"dimensions": [
|
||||
"page",
|
||||
"language",
|
||||
"user",
|
||||
"unpatrolled",
|
||||
"newPage",
|
||||
"robot",
|
||||
"anonymous",
|
||||
"namespace",
|
||||
"continent",
|
||||
"country",
|
||||
"region",
|
||||
"city"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"windowPeriod": "PT10m",
|
||||
"segmentGranularity": "hour",
|
||||
"rejectionPolicy": {
|
||||
"type": "test"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"type": "index_realtime",
|
||||
"schema": {
|
||||
"dataSource": "wikipedia",
|
||||
"aggregators": [
|
||||
{
|
||||
"type": "count",
|
||||
"name": "count"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "added",
|
||||
"fieldName": "added"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "deleted",
|
||||
"fieldName": "deleted"
|
||||
},
|
||||
{
|
||||
"type": "doubleSum",
|
||||
"name": "delta",
|
||||
"fieldName": "delta"
|
||||
}
|
||||
],
|
||||
"indexGranularity": "none"
|
||||
},
|
||||
"fireDepartmentConfig": {
|
||||
"maxRowsInMemory": 500000,
|
||||
"intermediatePersistPeriod": "PT10m"
|
||||
},
|
||||
"firehose": {
|
||||
"type": "irc",
|
||||
"nick": "wiki1234567890",
|
||||
"host": "irc.wikimedia.org",
|
||||
"channels": [
|
||||
"#en.wikipedia",
|
||||
"#fr.wikipedia",
|
||||
"#de.wikipedia",
|
||||
"#ja.wikipedia"
|
||||
],
|
||||
"decoder": {
|
||||
"type": "wikipedia",
|
||||
"namespaces": {
|
||||
"#en.wikipedia": {
|
||||
"_empty_": "main",
|
||||
"Category": "category",
|
||||
"$1 talk": "project talk",
|
||||
"Template talk": "template talk",
|
||||
"Help talk": "help talk",
|
||||
"Media": "media",
|
||||
"MediaWiki talk": "mediawiki talk",
|
||||
"File talk": "file talk",
|
||||
"MediaWiki": "mediawiki",
|
||||
"User": "user",
|
||||
"File": "file",
|
||||
"User talk": "user talk",
|
||||
"Template": "template",
|
||||
"Help": "help",
|
||||
"Special": "special",
|
||||
"Talk": "talk",
|
||||
"Category talk": "category talk"
|
||||
}
|
||||
}
|
||||
},
|
||||
"timeDimension": "timestamp",
|
||||
"timeFormat": "iso"
|
||||
},
|
||||
"windowPeriod": "PT10m",
|
||||
"segmentGranularity": "hour"
|
||||
}
|
|
@ -8,4 +8,4 @@ druid.db.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid
|
|||
druid.db.connector.user=druid
|
||||
druid.db.connector.password=diurd
|
||||
|
||||
druid.coordinator.startDelay=PT60s
|
||||
druid.coordinator.startDelay=PT70s
|
|
@ -1,17 +1,22 @@
|
|||
druid.host=localhost
|
||||
druid.port=8087
|
||||
druid.service=overlord
|
||||
-server
|
||||
-Xmx256m
|
||||
-Duser.timezone=UTC
|
||||
-Dfile.encoding=UTF-8
|
||||
|
||||
druid.zk.service.host=localhost
|
||||
-Ddruid.host=localhost
|
||||
-Ddruid.port=8080
|
||||
-Ddruid.service=overlord
|
||||
|
||||
druid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
druid.db.connector.user=druid
|
||||
druid.db.connector.password=diurd
|
||||
-Ddruid.zk.service.host=localhost
|
||||
|
||||
druid.selectors.indexing.serviceName=overlord
|
||||
druid.indexer.queue.startDelay=PT0M
|
||||
druid.indexer.runner.javaOpts="-server -Xmx1g"
|
||||
druid.indexer.runner.startPort=8088
|
||||
druid.indexer.fork.property.druid.computation.buffer.size=268435456
|
||||
druid.indexer.fork.property.druid.processing.numThreads=1
|
||||
-Ddruid.extensions.coordinates=["io.druid.extensions:druid-kafka-seven:0.6.105"]
|
||||
|
||||
-Ddruid.db.connector.connectURI=jdbc:mysql://localhost:3306/druid
|
||||
-Ddruid.db.connector.user=druid
|
||||
-Ddruid.db.connector.password=diurd
|
||||
|
||||
-Ddruid.selectors.indexing.serviceName=overlord
|
||||
-Ddruid.indexer.queue.startDelay=PT0M
|
||||
-Ddruid.indexer.runner.javaOpts="-server -Xmx256m"
|
||||
-Ddruid.indexer.fork.property.druid.processing.numThreads=1
|
||||
-Ddruid.indexer.fork.property.druid.computation.buffer.size=100000000
|
|
@ -33,7 +33,6 @@ import com.metamx.common.IAE;
|
|||
import com.metamx.common.ISE;
|
||||
import com.metamx.common.logger.Logger;
|
||||
import io.druid.data.input.InputRow;
|
||||
import io.druid.data.input.impl.SpatialDimensionSchema;
|
||||
import io.druid.data.input.impl.StringInputRowParser;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
import io.druid.segment.IndexIO;
|
||||
|
@ -61,7 +60,6 @@ import org.apache.hadoop.mapreduce.Job;
|
|||
import org.apache.hadoop.mapreduce.JobContext;
|
||||
import org.apache.hadoop.mapreduce.Partitioner;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
|
||||
|
@ -612,16 +610,10 @@ public class IndexGeneratorJob implements Jobby
|
|||
|
||||
private IncrementalIndex makeIncrementalIndex(Bucket theBucket, AggregatorFactory[] aggs)
|
||||
{
|
||||
List<SpatialDimensionSchema> spatialDimensionSchemas = config.getSchema().getDataSchema().getParser() == null
|
||||
? Lists.<SpatialDimensionSchema>newArrayList()
|
||||
: config.getSchema().getDataSchema().getParser()
|
||||
.getParseSpec()
|
||||
.getDimensionsSpec()
|
||||
.getSpatialDimensions();
|
||||
return new IncrementalIndex(
|
||||
new IncrementalIndexSchema.Builder()
|
||||
.withMinTimestamp(theBucket.time.getMillis())
|
||||
.withSpatialDimensions(spatialDimensionSchemas)
|
||||
.withSpatialDimensions(config.getSchema().getDataSchema().getParser())
|
||||
.withQueryGranularity(config.getSchema().getDataSchema().getGranularitySpec().getQueryGranularity())
|
||||
.withMetrics(aggs)
|
||||
.build()
|
||||
|
|
|
@ -53,7 +53,7 @@ public class ForkingTaskRunnerConfig
|
|||
@JsonProperty
|
||||
@Min(1024)
|
||||
@Max(65535)
|
||||
private int startPort = 8080;
|
||||
private int startPort = 8081;
|
||||
|
||||
@JsonProperty
|
||||
@NotNull
|
||||
|
|
|
@ -22,6 +22,7 @@ package io.druid.segment.incremental;
|
|||
import com.google.common.base.Function;
|
||||
import com.google.common.collect.Iterables;
|
||||
import com.google.common.collect.Lists;
|
||||
import io.druid.data.input.impl.InputRowParser;
|
||||
import io.druid.data.input.impl.SpatialDimensionSchema;
|
||||
import io.druid.granularity.QueryGranularity;
|
||||
import io.druid.query.aggregation.AggregatorFactory;
|
||||
|
@ -126,6 +127,20 @@ public class IncrementalIndexSchema
|
|||
return this;
|
||||
}
|
||||
|
||||
public Builder withSpatialDimensions(InputRowParser parser)
|
||||
{
|
||||
if (parser != null
|
||||
&& parser.getParseSpec() != null
|
||||
&& parser.getParseSpec().getDimensionsSpec() != null
|
||||
&& parser.getParseSpec().getDimensionsSpec().getSpatialDimensions() != null) {
|
||||
this.spatialDimensions = parser.getParseSpec().getDimensionsSpec().getSpatialDimensions();
|
||||
} else {
|
||||
this.spatialDimensions = Lists.newArrayList();
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder withSpatialDimensions(List<SpatialDimensionSchema> spatialDimensions)
|
||||
{
|
||||
this.spatialDimensions = spatialDimensions;
|
||||
|
|
|
@ -86,8 +86,8 @@ public class DruidProcessingModule implements Module
|
|||
if (maxDirectMemory < memoryNeeded) {
|
||||
throw new ProvisionException(
|
||||
String.format(
|
||||
"Not enough direct memory. Please adjust -XX:MaxDirectMemorySize, druid.computation.buffer.size, or druid.processing.numThreads: "
|
||||
+ "maxDirectMemory[%,d], memoryNeeded[%,d] = druid.computation.buffer.size[%,d] * ( druid.processing.numThreads[%,d] + 1 )",
|
||||
"Not enough direct memory. Please adjust -XX:MaxDirectMemorySize, druid.processing.buffer.sizeBytes, or druid.processing.numThreads: "
|
||||
+ "maxDirectMemory[%,d], memoryNeeded[%,d] = druid.processing.buffer.sizeBytes[%,d] * ( druid.processing.numThreads[%,d] + 1 )",
|
||||
maxDirectMemory,
|
||||
memoryNeeded,
|
||||
config.intermediateComputeSizeBytes(),
|
||||
|
|
|
@ -25,6 +25,7 @@ import com.fasterxml.jackson.databind.module.SimpleModule;
|
|||
import com.google.inject.Binder;
|
||||
import io.druid.data.input.ProtoBufInputRowParser;
|
||||
import io.druid.initialization.DruidModule;
|
||||
import io.druid.segment.realtime.firehose.IrcParser;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
@ -44,7 +45,8 @@ public class ParsersModule implements DruidModule
|
|||
return Arrays.<Module>asList(
|
||||
new SimpleModule("ParsersModule")
|
||||
.registerSubtypes(
|
||||
new NamedType(ProtoBufInputRowParser.class, "protobuf")
|
||||
new NamedType(ProtoBufInputRowParser.class, "protobuf"),
|
||||
new NamedType(IrcParser.class, "irc")
|
||||
)
|
||||
);
|
||||
}
|
||||
|
|
|
@ -106,6 +106,7 @@ public class IrcFirehoseFactory implements FirehoseFactory<IrcParser>
|
|||
private final String nick;
|
||||
private final String host;
|
||||
private final List<String> channels;
|
||||
private final IrcDecoder decoder;
|
||||
private final IrcParser parser;
|
||||
|
||||
@JsonCreator
|
||||
|
@ -119,9 +120,34 @@ public class IrcFirehoseFactory implements FirehoseFactory<IrcParser>
|
|||
this.nick = nick;
|
||||
this.host = host;
|
||||
this.channels = channels;
|
||||
this.decoder = decoder;
|
||||
this.parser = new IrcParser(decoder);
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getNick()
|
||||
{
|
||||
return nick;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getHost()
|
||||
{
|
||||
return host;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public List<String> getChannels()
|
||||
{
|
||||
return channels;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public IrcDecoder getDecoder()
|
||||
{
|
||||
return decoder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Firehose connect(final IrcParser firehoseParser) throws IOException
|
||||
{
|
||||
|
|
|
@ -19,25 +19,38 @@
|
|||
|
||||
package io.druid.segment.realtime.firehose;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||
import com.ircclouds.irc.api.domain.messages.ChannelPrivMsg;
|
||||
import com.metamx.common.Pair;
|
||||
import com.metamx.common.exception.FormattedException;
|
||||
import io.druid.data.input.InputRow;
|
||||
import io.druid.data.input.impl.DimensionsSpec;
|
||||
import io.druid.data.input.impl.InputRowParser;
|
||||
import io.druid.data.input.impl.ParseSpec;
|
||||
import io.druid.data.input.impl.TimestampSpec;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
/**
|
||||
*/
|
||||
@JsonTypeName("protoBuf")
|
||||
public class IrcParser implements InputRowParser<Pair<DateTime, ChannelPrivMsg>>
|
||||
{
|
||||
private final IrcDecoder decoder;
|
||||
|
||||
public IrcParser(IrcDecoder decoder)
|
||||
@JsonCreator
|
||||
public IrcParser(@JsonProperty("decoder") IrcDecoder decoder)
|
||||
{
|
||||
this.decoder = decoder;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public IrcDecoder getDecoder()
|
||||
{
|
||||
return decoder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputRow parse(Pair<DateTime, ChannelPrivMsg> msg) throws FormattedException
|
||||
{
|
||||
|
@ -47,7 +60,7 @@ public class IrcParser implements InputRowParser<Pair<DateTime, ChannelPrivMsg>>
|
|||
@Override
|
||||
public ParseSpec getParseSpec()
|
||||
{
|
||||
throw new UnsupportedOperationException();
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -73,6 +73,7 @@ class WikipediaIrcDecoder implements IrcDecoder
|
|||
);
|
||||
|
||||
final Map<String, Map<String, String>> namespaces;
|
||||
final String geoIpDatabase;
|
||||
|
||||
public WikipediaIrcDecoder( Map<String, Map<String, String>> namespaces) {
|
||||
this(namespaces, null);
|
||||
|
@ -86,7 +87,7 @@ class WikipediaIrcDecoder implements IrcDecoder
|
|||
namespaces = Maps.newHashMap();
|
||||
}
|
||||
this.namespaces = namespaces;
|
||||
|
||||
this.geoIpDatabase = geoIpDatabase;
|
||||
|
||||
File geoDb;
|
||||
if(geoIpDatabase != null) {
|
||||
|
@ -116,6 +117,18 @@ class WikipediaIrcDecoder implements IrcDecoder
|
|||
}
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public Map<String, Map<String, String>> getNamespaces()
|
||||
{
|
||||
return namespaces;
|
||||
}
|
||||
|
||||
@JsonProperty
|
||||
public String getGeoIpDatabase()
|
||||
{
|
||||
return geoIpDatabase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public InputRow decodeMessage(final DateTime timestamp, String channel, String msg)
|
||||
{
|
||||
|
|
|
@ -177,17 +177,11 @@ public class Sink implements Iterable<FireHydrant>
|
|||
|
||||
private FireHydrant makeNewCurrIndex(long minTimestamp, DataSchema schema)
|
||||
{
|
||||
List<SpatialDimensionSchema> spatialDimensionSchemas = schema.getParser() == null
|
||||
? Lists.<SpatialDimensionSchema>newArrayList()
|
||||
: schema.getParser()
|
||||
.getParseSpec()
|
||||
.getDimensionsSpec()
|
||||
.getSpatialDimensions();
|
||||
IncrementalIndex newIndex = new IncrementalIndex(
|
||||
new IncrementalIndexSchema.Builder()
|
||||
.withMinTimestamp(minTimestamp)
|
||||
.withQueryGranularity(schema.getGranularitySpec().getQueryGranularity())
|
||||
.withSpatialDimensions(spatialDimensionSchemas)
|
||||
.withSpatialDimensions(schema.getParser())
|
||||
.withMetrics(schema.getAggregators())
|
||||
.build()
|
||||
);
|
||||
|
|
Loading…
Reference in New Issue